Paradox Game Engine  v1.0.0 beta06
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Events Macros Pages
GrammarDataBuilder.cs
Go to the documentation of this file.
1 #region License
2 /* **********************************************************************************
3  * Copyright (c) Roman Ivantsov
4  * This source code is subject to terms and conditions of the MIT License
5  * for Irony. A copy of the license can be found in the License.txt file
6  * at the root of this distribution.
7  * By using this source code in any fashion, you are agreeing to be bound by the terms of the
8  * MIT License.
9  * You must not remove this notice from this software.
10  * **********************************************************************************/
11 #endregion
12 
13 using System;
14 using System.Collections.Generic;
15 using System.Linq;
16 using System.Text;
17 
18 namespace Irony.Parsing.Construction {
19 
20  internal class GrammarDataBuilder {
21  LanguageData _language;
22  Grammar _grammar;
23  GrammarData _grammarData;
24  int _unnamedCount; //internal counter for generating names for unnamed non-terminals
25  internal int _lastItemId; //each LR0Item gets its unique ID, last assigned (max) Id is kept in this field
26 
27  internal GrammarDataBuilder(LanguageData language) {
28  _language = language;
29  _grammar = _language.Grammar;
30  }
31 
32  internal void Build() {
33  _grammarData = _language.GrammarData;
34  CreateAugmentedRoots();
35  CollectTermsFromGrammar();
36  AssignWhitespaceAndDelimiters();
37  InitTermLists();
38  FillOperatorReportGroup();
39  FindClosingBraces();
40  CreateProductions();
41  ComputeNonTerminalsNullability(_grammarData);
42  ComputeTailsNullability(_grammarData);
43  ValidateGrammar();
44  }
45 
46  private void CreateAugmentedRoots() {
47  _grammarData.AugmentedRoot = CreateAugmentedRoot(_grammar.Root);
48  foreach(var snippetRoot in _grammar.SnippetRoots)
49  _grammarData.AugmentedSnippetRoots.Add(CreateAugmentedRoot(snippetRoot));
50  }
51 
52  private NonTerminal CreateAugmentedRoot(NonTerminal root) {
53  var result = new NonTerminal(root.Name + "'", root + _grammar.Eof);
54  result.SetFlag(TermFlags.NoAstNode); //mark that we don't need AST node here
55  return result;
56  }
57 
58  private void CollectTermsFromGrammar() {
59  _unnamedCount = 0;
60  _grammarData.AllTerms.Clear();
61  //Start with NonGrammarTerminals, and set IsNonGrammar flag
62  foreach (Terminal t in _grammarData.Grammar.NonGrammarTerminals) {
63  t.SetFlag(TermFlags.IsNonGrammar);
64  _grammarData.AllTerms.Add(t);
65  }
66  //Add main root
67  CollectTermsRecursive(_grammarData.AugmentedRoot);
68  foreach(var augmRoot in _grammarData.AugmentedSnippetRoots)
69  CollectTermsRecursive(augmRoot);
70  //Add syntax error explicitly
71  _grammarData.AllTerms.Add(_grammar.SyntaxError);
72  }
73 
74  private void CollectTermsRecursive(BnfTerm term) {
75  if (_grammarData.AllTerms.Contains(term)) return;
76  _grammarData.AllTerms.Add(term);
77  NonTerminal nt = term as NonTerminal;
78  if (nt == null) return;
79 
80  if (string.IsNullOrEmpty(nt.Name)) {
81  if (nt.Rule != null && !string.IsNullOrEmpty(nt.Rule.Name))
82  nt.Name = nt.Rule.Name;
83  else
84  nt.Name = "Unnamed" + (_unnamedCount++);
85  }
86  if (nt.Rule == null)
87  _language.Errors.AddAndThrow(GrammarErrorLevel.Error, null, Resources.ErrNtRuleIsNull, nt.Name);
88  //check all child elements
89  foreach (BnfTermList elemList in nt.Rule.Data)
90  for (int i = 0; i < elemList.Count; i++) {
91  BnfTerm child = elemList[i];
92  if (child == null) {
93  _language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrRuleContainsNull, nt.Name, i);
94  continue; //for i loop
95  }
96  //Check for nested expression - convert to non-terminal
97  BnfExpression expr = child as BnfExpression;
98  if (expr != null) {
99  child = new NonTerminal(null, expr);
100  elemList[i] = child;
101  }
102  CollectTermsRecursive(child);
103  }//for i
104  }//method
105 
106  private void FillOperatorReportGroup() {
107  foreach(var group in _grammar.TermReportGroups)
108  if(group.GroupType == TermReportGroupType.Operator) {
109  foreach(var term in _grammarData.Terminals)
110  if (term.FlagIsSet(TermFlags.IsOperator))
111  group.Terminals.Add(term);
112  return;
113  }
114  }
115 
116  private void FindClosingBraces() {
117  foreach(var term in _grammar.KeyTerms.Values) {
118  if (term.FlagIsSet(TermFlags.IsCloseBrace))
119  _grammarData.ClosingBraces.Add(term.Text);
120  }
121  }
122 
123  private void AssignWhitespaceAndDelimiters() {
124  var delims = _grammar.Delimiters;
125  //if it was not assigned by language creator, let's guess them
126  if(delims == null) {
127  delims = string.Empty;
128  var commonDelims = ",;[](){}"; //chars usually used as delimiters in programming languages
129  foreach(var delim in commonDelims)
130  if (_grammar.KeyTerms.ContainsKey(delim.ToString())) //if language uses this char as a Term, then include it
131  delims += delim;
132  }//if
133  _grammarData.WhitespaceAndDelimiters = _grammar.WhitespaceChars + delims
134  + "\n" //in case if it is removed from whitespace chars by NewLineTerminal
135  + "\0"; //EOF: SourceStream returns this char when we reach end of file
136  }
137 
138 
139  private void InitTermLists() {
140  //Collect terminals and NonTerminals
141  foreach (BnfTerm term in _grammarData.AllTerms) { //remember - we may have hints, so it's not only terminals and non-terminals
142  if (term is NonTerminal) _grammarData.NonTerminals.Add((NonTerminal)term);
143  if (term is Terminal) _grammarData.Terminals.Add((Terminal)term);
144  }
145  _grammarData.Terminals.Sort(Terminal.ByName);
146  //Mark keywords - any "word" symbol directly mentioned in the grammar
147  foreach (var term in _grammarData.Terminals) {
148  var symTerm = term as KeyTerm;
149  if (symTerm == null) continue;
150  if (!string.IsNullOrEmpty(symTerm.Text) && char.IsLetter(symTerm.Text[0]))
151  symTerm.SetFlag(TermFlags.IsKeyword);
152  }//foreach term
153  //Init all terms
154  foreach (var term in _grammarData.AllTerms)
155  term.Init(_grammarData);
156  }//method
157 
158  private void CreateProductions() {
159  _lastItemId = 0;
160  //CheckWrapTailHints() method may add non-terminals on the fly, so we have to use for loop here (not foreach)
161  for (int i = 0; i < _grammarData.NonTerminals.Count; i++) {
162  var nt = _grammarData.NonTerminals[i];
163  nt.Productions.Clear();
164  //Get data (sequences) from both Rule and ErrorRule
165  BnfExpressionData allData = new BnfExpressionData();
166  allData.AddRange(nt.Rule.Data);
167  if (nt.ErrorRule != null)
168  allData.AddRange(nt.ErrorRule.Data);
169  //actually create productions for each sequence
170  foreach (BnfTermList prodOperands in allData) {
171  Production prod = CreateProduction(nt, prodOperands);
172  nt.Productions.Add(prod);
173  } //foreach prodOperands
174  // insert pending custom hints in all productions
175  nt.InsertCustomHints();
176  }
177  }
178 
179  private Production CreateProduction(NonTerminal lvalue, BnfTermList operands) {
180  Production prod = new Production(lvalue);
181  GrammarHintList hints = null;
182  //create RValues list skipping Empty terminal and collecting grammar hints
183  foreach (BnfTerm operand in operands) {
184  if (operand == _grammar.Empty)
185  continue;
186  //Collect hints as we go - they will be added to the next non-hint element
187  GrammarHint hint = operand as GrammarHint;
188  if (hint != null) {
189  if (hints == null) hints = new GrammarHintList();
190  hints.Add(hint);
191  continue;
192  }
193  //Add the operand and create LR0 Item
194  prod.RValues.Add(operand);
195  prod.LR0Items.Add(new LR0Item(_lastItemId++, prod, prod.RValues.Count - 1, hints));
196  hints = null;
197  }//foreach operand
198  //set the flags
199  if (prod.RValues.Count == 0)
200  prod.Flags |= ProductionFlags.IsEmpty;
201  //Add final LRItem
202  ComputeProductionFlags(prod);
203  prod.LR0Items.Add(new LR0Item(_lastItemId++, prod, prod.RValues.Count, hints));
204  return prod;
205  }
206 
207  private void ComputeProductionFlags(Production production) {
208  production.Flags = ProductionFlags.None;
209  foreach (var rv in production.RValues) {
210  //Check if it is a Terminal or Error element
211  var t = rv as Terminal;
212  if (t != null) {
213  production.Flags |= ProductionFlags.HasTerminals;
214  if (t.Category == TokenCategory.Error) production.Flags |= ProductionFlags.IsError;
215  }
216  if(rv.FlagIsSet(TermFlags.IsPunctuation)) continue;
217  }//foreach
218  var lvalue = production.LValue;
219  //Set IsListBuilder flag
220  if (production.RValues.Count > 0 && production.RValues[0] == production.LValue
221  && lvalue.FlagIsSet(TermFlags.IsList))
222  production.Flags |= ProductionFlags.IsListBuilder;
223  }//method
224 
225  private static void ComputeNonTerminalsNullability(GrammarData data) {
226  NonTerminalList undecided = data.NonTerminals;
227  while (undecided.Count > 0) {
228  NonTerminalList newUndecided = new NonTerminalList();
229  foreach (NonTerminal nt in undecided)
230  if (!ComputeNullability(nt))
231  newUndecided.Add(nt);
232  if (undecided.Count == newUndecided.Count) return; //we didn't decide on any new, so we're done
233  undecided = newUndecided;
234  }//while
235  }
236 
237  private static bool ComputeNullability(NonTerminal nonTerminal) {
238  foreach (Production prod in nonTerminal.Productions) {
239  if (prod.RValues.Count == 0) {
240  nonTerminal.SetFlag(TermFlags.IsNullable);
241  return true; //decided, Nullable
242  }//if
243  //If production has terminals, it is not nullable and cannot contribute to nullability
244  if (prod.IsSet(ProductionFlags.HasTerminals)) continue;
245  //Go thru all elements of production and check nullability
246  bool allNullable = true;
247  foreach (BnfTerm child in prod.RValues) {
248  allNullable &= child.FlagIsSet(TermFlags.IsNullable);
249  }//foreach child
250  if (allNullable) {
251  nonTerminal.SetFlag(TermFlags.IsNullable);
252  return true;
253  }
254  }//foreach prod
255  return false; //cannot decide
256  }
257 
258  private static void ComputeTailsNullability(GrammarData data) {
259  foreach (var nt in data.NonTerminals) {
260  foreach (var prod in nt.Productions) {
261  var count = prod.LR0Items.Count;
262  for (int i = count - 1; i >= 0; i--) {
263  var item = prod.LR0Items[i];
264  item.TailIsNullable = true;
265  if (item.Current == null) continue;
266  if (!item.Current.FlagIsSet(TermFlags.IsNullable))
267  break; //for i
268  }//for i
269  }//foreach prod
270  }
271  }
272 
273  #region Grammar Validation
274  private void ValidateGrammar() {
275  var createAst = _grammar.FlagIsSet(LanguageFlags.CreateAst);
276  var missingAstTypeSet = new NonTerminalSet();
277  var invalidTransSet = new NonTerminalSet();
278  foreach(var nt in _grammarData.NonTerminals) {
279  //Check that if CreateAst flag is set then AstNodeType or AstNodeCreator is assigned on all non-transient nodes.
280  if(createAst && nt.AstNodeCreator == null && nt.AstNodeType == null && !nt.FlagIsSet(TermFlags.NoAstNode))
281  missingAstTypeSet.Add(nt);
282  if(nt.FlagIsSet(TermFlags.IsTransient)) {
283  //List non-terminals cannot be marked transient - otherwise there may be some ambiguities and inconsistencies
284  if (nt.FlagIsSet(TermFlags.IsList))
285  _language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrListCannotBeTransient, nt.Name);
286  //Count number of non-punctuation child nodes in each production
287  foreach(var prod in nt.Productions)
288  if (CountNonPunctuationTerms(prod) > 1) invalidTransSet.Add(nt);
289  }//if transient
290  //Validate error productions
291  foreach(var prod in nt.Productions)
292  if(prod.IsSet(ProductionFlags.IsError)) {
293  var lastTerm = prod.RValues[prod.RValues.Count -1];
294  if (!(lastTerm is Terminal) || lastTerm == _grammar.SyntaxError)
295  _language.Errors.Add(GrammarErrorLevel.Warning, null, Resources.ErrLastTermOfErrorProd, nt.Name);
296  // "The last term of error production must be a terminal. NonTerminal: {0}"
297  }//foreach prod
298  }//foreac nt
299 
300  if (missingAstTypeSet.Count > 0)
301  _language.Errors.Add(GrammarErrorLevel.Warning, null, Resources.ErrNodeTypeNotSetOn, missingAstTypeSet.ToString());
302  if (invalidTransSet.Count > 0)
303  _language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrTransientNtMustHaveOneTerm,invalidTransSet.ToString());
304  }//method
305 
306  private int CountNonPunctuationTerms(Production production) {
307  int count = 0;
308  foreach(var rvalue in production.RValues)
309  if (!rvalue.FlagIsSet(TermFlags.IsPunctuation)) count++;
310  return count;
311  }
312  #endregion
313 
314  }//class
315 }
TokenCategory
Token category.
Definition: Token.cs:29
readonly ProductionList Productions
Definition: NonTerminal.cs:67
readonly NonTerminal LValue
Definition: ParserData.cs:137
A strongly-typed resource class, for looking up localized strings, etc.
readonly TerminalSet NonGrammarTerminals
Definition: Grammar.cs:52
static string ErrLastTermOfErrorProd
Looks up a localized string similar to The last term of production containing SyntaxError must be a t...
static string ErrRuleContainsNull
Looks up a localized string similar to Rule for NonTerminal {0} contains null as an operand in positi...
static string ErrTransientNtMustHaveOneTerm
Looks up a localized string similar to Transient non-terminal must have zero or one non-punctuation c...
_In_ size_t count
Definition: DirectXTexP.h:174
static string ErrListCannotBeTransient
Looks up a localized string similar to List non-terminals cannot be marked transient; list: ({0})...
bool FlagIsSet(TermFlags flag)
Definition: BnfTerm.cs:109
Describes a language.
Definition: LanguageData.cs:23
readonly NonTerminalList NonTerminals
Definition: GrammarData.cs:29
bool IsSet(ProductionFlags flag)
Definition: ParserData.cs:145
readonly BnfTermList RValues
Definition: ParserData.cs:138
ProductionFlags Flags
Definition: ParserData.cs:136
static string ErrNodeTypeNotSetOn
Looks up a localized string similar to Warning: AstNodeType or AstNodeCreator is not set on non-termi...
static string ErrNtRuleIsNull
Looks up a localized string similar to Non-terminal {0} has uninitialized Rule property..