1 /*------------------------------------------------------------------------- 2 ParserGen.cs -- Generation of the Recursive Descent Parser 3 Compiler Generator Coco/R, 4 Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz 5 extended by M. Loeberbauer & A. Woess, Univ. of Linz 6 with improvements by Pat Terry, Rhodes University 7 8 This program is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published by the 10 Free Software Foundation; either version 2, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License along 19 with this program; if not, write to the Free Software Foundation, Inc., 20 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 21 22 As an exception, it is allowed to write an extension of Coco/R that is 23 used as a plugin in non-free software. 24 25 If not otherwise stated, any source code generated by Coco/R (other than 26 Coco/R itself) does not fall under the GNU General Public License. 27 -------------------------------------------------------------------------*/ 28 using System; 29 using System.IO; 30 using System.Collections; 31 using System.Text; 32 33 namespace at.jku.ssw.Coco { 34 35 public class ParserGen { 36 37 const int maxTerm = 3; // sets of size < maxTerm are enumerated 38 const char CR = '\r'; 39 const char LF = '\n'; 40 const int EOF = -1; 41 42 const int tErr = 0; // error codes 43 const int altErr = 1; 44 const int syncErr = 2; 45 46 public Position usingPos; // "using" definitions from the attributed grammar 47 48 int errorNr; // highest parser error number 49 Symbol curSy; // symbol whose production is currently generated 50 FileStream fram; // parser frame file 51 StreamWriter gen; // generated parser source file 52 StringWriter err; // generated parser error messages 53 ArrayList symSet = new ArrayList(); 54 55 Tab tab; // other Coco objects 56 TextWriter trace; 57 Errors errors; 58 Buffer buffer; 59 60 void Indent (int n) { 61 for (int i = 1; i <= n; i++) gen.Write('\t'); 62 } 63 64 65 bool Overlaps(BitArray s1, BitArray s2) { 66 int len = s1.Count; 67 for (int i = 0; i < len; ++i) { 68 if (s1[i] && s2[i]) { 69 return true; 70 } 71 } 72 return false; 73 } 74 75 // use a switch if more than 5 alternatives and none starts with a resolver, and no LL1 warning 76 bool UseSwitch (Node p) { 77 BitArray s1, s2; 78 if (p.typ != Node.alt) return false; 79 int nAlts = 0; 80 s1 = new BitArray(tab.terminals.Count); 81 while (p != null) { 82 s2 = tab.Expected0(p.sub, curSy); 83 // must not optimize with switch statement, if there are ll1 warnings 84 if (Overlaps(s1, s2)) { return false; } 85 s1.Or(s2); 86 ++nAlts; 87 // must not optimize with switch-statement, if alt uses a resolver expression 88 if (p.sub.typ == Node.rslv) return false; 89 p = p.down; 90 } 91 return nAlts > 5; 92 } 93 94 void CopySourcePart (Position pos, int indent) { 95 // Copy text described by pos from atg to gen 96 int ch, i; 97 if (pos != null) { 98 buffer.Pos = pos.beg; ch = buffer.Read(); 99 if (tab.emitLines) {100 gen.WriteLine();101 gen.WriteLine("#line {0} \"{1}\"", pos.line, tab.srcName);102 }103 Indent(indent);104 while (buffer.Pos <= pos.end) {105 while (ch == CR || ch == LF) { // eol is either CR or CRLF or LF106 gen.WriteLine(); Indent(indent);107 if (ch == CR) ch = buffer.Read(); // skip CR108 if (ch == LF) ch = buffer.Read(); // skip LF109 for (i = 1; i <= pos.col && (ch == ' ' || ch == '\t'); i++) { 110 // skip blanks at beginning of line111 ch = buffer.Read();112 }113 if (buffer.Pos > pos.end) goto done;114 }115 gen.Write((char)ch);116 ch = buffer.Read();117 }118 done:119 if (indent > 0) gen.WriteLine();120 }121 }122 123 void GenErrorMsg (int errTyp, Symbol sym) {124 errorNr++;125 err.Write("\t\t\tcase " + errorNr + ": s = \"");126 switch (errTyp) {127 case tErr: 128 if (sym.name[0] == '"') err.Write(tab.Escape(sym.name) + " expected");129 else err.Write(sym.name + " expected"); 130 break;131 case altErr: err.Write("invalid " + sym.name); break;132 case syncErr: err.Write("this symbol not expected in " + sym.name); break;133 }134 err.WriteLine("\"; break;");135 }136 137 int NewCondSet (BitArray s) {138 for (int i = 1; i < symSet.Count; i++) // skip symSet[0] (reserved for union of SYNC sets)139 if (Sets.Equals(s, (BitArray)symSet[i])) return i;140 symSet.Add(s.Clone());141 return symSet.Count - 1;142 }143 144 void GenCond (BitArray s, Node p) {145 if (p.typ == Node.rslv) CopySourcePart(p.pos, 0);146 else {147 int n = Sets.Elements(s);148 if (n == 0) gen.Write("false"); // happens if an ANY set matches no symbol149 else if (n <= maxTerm)150 foreach (Symbol sym in tab.terminals) {151 if (s[sym.n]) {152 gen.Write("la.kind == {0}", sym.n);153 --n;154 if (n > 0) gen.Write(" || ");155 }156 }157 else158 gen.Write("StartOf({0})", NewCondSet(s));159 }160 }161 162 void PutCaseLabels (BitArray s) {163 foreach (Symbol sym in tab.terminals)164 if (s[sym.n]) gen.Write("case {0}: ", sym.n);165 }166 167 void GenCode (Node p, int indent, BitArray isChecked) {168 Node p2;169 BitArray s1, s2;170 while (p != null) {171 switch (p.typ) {172 case Node.nt: {173 Indent(indent);174 gen.Write(p.sym.name + "(");175 CopySourcePart(p.pos, 0);176 gen.WriteLine(");");177 break;178 }179 case Node.t: {180 Indent(indent);181 // assert: if isChecked[p.sym.n] is true, then isChecked contains only p.sym.n182 if (isChecked[p.sym.n]) gen.WriteLine("Get();");183 else gen.WriteLine("Expect({0});", p.sym.n);184 break;185 }186 case Node.wt: {187 Indent(indent);188 s1 = tab.Expected(p.next, curSy);189 s1.Or(tab.allSyncSets);190 gen.WriteLine("ExpectWeak({0}, {1});", p.sym.n, NewCondSet(s1));191 break;192 }193 case Node.any: {194 Indent(indent);195 int acc = Sets.Elements(p.set);196 if (tab.terminals.Count == (acc + 1) || (acc > 0 && Sets.Equals(p.set, isChecked))) {197 // either this ANY accepts any terminal (the + 1 = end of file), or exactly what's allowed here198 gen.WriteLine("Get();");199 } else {200 GenErrorMsg(altErr, curSy);201 if (acc > 0) {202 gen.Write("if ("); GenCond(p.set, p); gen.WriteLine(") Get(); else SynErr({0});", errorNr);203 } else gen.WriteLine("SynErr({0}); // ANY node that matches no symbol", errorNr);204 }205 break;206 }207 case Node.eps: break; // nothing208 case Node.rslv: break; // nothing209 case Node.sem: {210 CopySourcePart(p.pos, indent);211 break;212 }213 case Node.sync: {214 Indent(indent);215 GenErrorMsg(syncErr, curSy);216 s1 = (BitArray)p.set.Clone();217 gen.Write("while (!("); GenCond(s1, p); gen.Write(")) { ");218 gen.Write("SynErr({0}); Get();", errorNr); gen.WriteLine("}");219 break;220 }221 case Node.alt: {222 s1 = tab.First(p);223 bool equal = Sets.Equals(s1, isChecked);224 bool useSwitch = UseSwitch(p);225 if (useSwitch) { Indent(indent); gen.WriteLine("switch (la.kind) { "); }226 p2 = p;227 while (p2 != null) {228 s1 = tab.Expected(p2.sub, curSy);229 Indent(indent);230 if (useSwitch) { 231 PutCaseLabels(s1); gen.WriteLine("{ ");232 } else if (p2 == p) { 233 gen.Write("if ("); GenCond(s1, p2.sub); gen.WriteLine(") { "); 234 } else if (p2.down == null && equal) { gen.WriteLine("} else { ");235 } else { 236 gen.Write("} else if ("); GenCond(s1, p2.sub); gen.WriteLine(") { "); 237 }238 GenCode(p2.sub, indent + 1, s1);239 if (useSwitch) {240 Indent(indent); gen.WriteLine("\tbreak;");241 Indent(indent); gen.WriteLine("}");242 }243 p2 = p2.down;244 }245 Indent(indent);246 if (equal) {247 gen.WriteLine("}");248 } else {249 GenErrorMsg(altErr, curSy);250 if (useSwitch) {251 gen.WriteLine("default: SynErr({0}); break;", errorNr);252 Indent(indent); gen.WriteLine("}");253 } else {254 gen.Write("} "); gen.WriteLine("else SynErr({0});", errorNr);255 }256 }257 break;258 }259 case Node.iter: {260 Indent(indent);261 p2 = p.sub;262 gen.Write("while (");263 if (p2.typ == Node.wt) {264 s1 = tab.Expected(p2.next, curSy);265 s2 = tab.Expected(p.next, curSy);266 gen.Write("WeakSeparator({0},{1},{2}) ", p2.sym.n, NewCondSet(s1), NewCondSet(s2));267 s1 = new BitArray(tab.terminals.Count); // for inner structure268 if (p2.up || p2.next == null) p2 = null; else p2 = p2.next;269 } else {270 s1 = tab.First(p2);271 GenCond(s1, p2);272 }273 gen.WriteLine(") { ");274 GenCode(p2, indent + 1, s1);275 Indent(indent); gen.WriteLine("}");276 break;277 }278 case Node.opt:279 s1 = tab.First(p.sub);280 Indent(indent);281 gen.Write("if ("); GenCond(s1, p.sub); gen.WriteLine(") { ");282 GenCode(p.sub, indent + 1, s1);283 Indent(indent); gen.WriteLine("}");284 break;285 }286 if (p.typ != Node.eps && p.typ != Node.sem && p.typ != Node.sync) 287 isChecked.SetAll(false); // = new BitArray(tab.terminals.Count);288 if (p.up) break;289 p = p.next;290 }291 }292 293 void GenTokens() {294 foreach (Symbol sym in tab.terminals) {295 if (Char.IsLetter(sym.name[0]))296 gen.WriteLine("\tpublic const int _{0} = {1};", sym.name, sym.n);297 }298 }299 300 void GenPragmas() {301 foreach (Symbol sym in tab.pragmas) {302 gen.WriteLine("\tpublic const int _{0} = {1};", sym.name, sym.n);303 }304 }305 306 void GenCodePragmas() {307 foreach (Symbol sym in tab.pragmas) {308 gen.WriteLine("\t\t\t\tif (la.kind == {0}) { { ", sym.n);309 CopySourcePart(sym.semPos, 4);310 gen.WriteLine("\t\t\t\t}");311 }312 }313 314 void GenProductions() {315 foreach (Symbol sym in tab.nonterminals) {316 curSy = sym;317 gen.Write("\tvoid {0}(", sym.name);318 CopySourcePart(sym.attrPos, 0);319 gen.WriteLine(") { ");320 CopySourcePart(sym.semPos, 2);321 GenCode(sym.graph, 2, new BitArray(tab.terminals.Count));322 gen.WriteLine("\t}"); gen.WriteLine();323 }324 }325 326 void InitSets() {327 for (int i = 0; i < symSet.Count; i++) {328 BitArray s = (BitArray)symSet[i];329 gen.Write("\t\t{ ");330 int j = 0;331 foreach (Symbol sym in tab.terminals) {332 if (s[sym.n]) gen.Write("_T,"); else gen.Write("_x,");333 ++j;334 if (j%4 == 0) gen.Write(" ");335 }336 if (i == symSet.Count-1) gen.WriteLine("_x}"); else gen.WriteLine("_x},");337 }338 }339 340 public void WriteParser () {341 Generator g = new Generator(tab);342 int oldPos = buffer.Pos; // Pos is modified by CopySourcePart343 symSet.Add(tab.allSyncSets);344 345 fram = g.OpenFrame("Parser.frame");346 gen = g.OpenGen("Parser.cs");347 err = new StringWriter();348 foreach (Symbol sym in tab.terminals) GenErrorMsg(tErr, sym);349 350 g.GenCopyright();351 g.SkipFramePart("-->begin");352 353 if (usingPos != null) { CopySourcePart(usingPos, 0); gen.WriteLine(); }354 g.CopyFramePart("-->namespace");355 /* AW open namespace, if it exists */356 if (tab.nsName != null && tab.nsName.Length > 0) {357 gen.WriteLine("namespace {0} { { ", tab.nsName);358 gen.WriteLine();359 }360 g.CopyFramePart("-->constants");361 GenTokens(); /* ML 2002/09/07 write the token kinds */362 gen.WriteLine("\tpublic const int maxT = {0};", tab.terminals.Count-1);363 GenPragmas(); /* ML 2005/09/23 write the pragma kinds */364 g.CopyFramePart("-->declarations"); CopySourcePart(tab.semDeclPos, 0);365 g.CopyFramePart("-->pragmas"); GenCodePragmas();366 g.CopyFramePart("-->productions"); GenProductions();367 g.CopyFramePart("-->parseRoot"); gen.WriteLine("\t\t{0}();", tab.gramSy.name); if (tab.checkEOF) gen.WriteLine("\t\tExpect(0);");368 g.CopyFramePart("-->initialization"); InitSets();369 g.CopyFramePart("-->errors"); gen.Write(err.ToString());370 g.CopyFramePart(null);371 /* AW 2002-12-20 close namespace, if it exists */372 if (tab.nsName != null && tab.nsName.Length > 0) gen.Write("}");373 gen.Close();374 buffer.Pos = oldPos;375 }376 377 public void WriteStatistics () {378 trace.WriteLine();379 trace.WriteLine("{0} terminals", tab.terminals.Count);380 trace.WriteLine("{0} symbols", tab.terminals.Count + tab.pragmas.Count +381 tab.nonterminals.Count);382 trace.WriteLine("{0} nodes", tab.nodes.Count);383 trace.WriteLine("{0} sets", symSet.Count);384 }385 386 public ParserGen (Parser parser) {387 tab = parser.tab;388 errors = parser.errors;389 trace = parser.trace;390 buffer = parser.scanner.buffer;391 errorNr = -1;392 usingPos = null;393 }394 395 } // end ParserGen396 397 } // end namespace