Paradox Game Engine  v1.0.0 beta06
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Events Macros Pages
Grammar.cs
Go to the documentation of this file.
1 // Copyright (c) 2014 Silicon Studio Corp. (http://siliconstudio.co.jp)
2 // This file is distributed under GPL v3. See LICENSE.md for details.
3 //
4 // ----------------------------------------------------------------------
5 // Gold Parser engine.
6 // See more details on http://www.devincook.com/goldparser/
7 //
8 // Original code is written in VB by Devin Cook (GOLDParser@DevinCook.com)
9 //
10 // This translation is done by Vladimir Morozov (vmoroz@hotmail.com)
11 //
12 // The translation is based on the other engine translations:
13 // Delphi engine by Alexandre Rai (riccio@gmx.at)
14 // C# engine by Marcus Klimstra (klimstra@home.nl)
15 // ----------------------------------------------------------------------
16 #region Using directives
17 
18 using System;
19 using System.IO;
20 using System.Text;
21 using System.Collections;
22 
23 #endregion
24 
25 namespace GoldParser
26 {
27  /// <summary>
28  /// Contains grammar tables required for parsing.
29  /// </summary>
30  internal class Grammar
31  {
32  #region Fields and constants
33 
34  /// <summary>
35  /// Identifies Gold parser grammar file.
36  /// </summary>
37  public const string FileHeader = "GOLD Parser Tables/v1.0";
38 
39  // Grammar header information
40  private string m_name; // Name of the grammar
41  private string m_version; // Version of the grammar
42  private string m_author; // Author of the grammar
43  private string m_about; // Grammar description
44  private int m_startSymbolIndex; // Start symbol index
45  private bool m_caseSensitive; // Grammar is case sensitive or not
46 
47  // Tables read from the binary grammar file
48  private Symbol[] m_symbolTable; // Symbol table
49  private String[] m_charSetTable; // Charset table
50  internal Rule[] m_ruleTable; // Rule table
51  internal DfaState[] m_dfaStateTable; // DFA state table
52  internal LRState[] m_lrStateTable; // LR state table
53 
54  // Initial states
55  internal int m_dfaInitialStateIndex; // DFA initial state index
56  internal DfaState m_dfaInitialState; // DFA initial state
57  internal int m_lrInitialState; // LR initial state
58 
59  // Internal state of grammar parser
60  private BinaryReader m_reader; // Source of the grammar
61  private int m_entryCount; // Number of entries left
62 
63  internal Symbol m_errorSymbol;
64  internal Symbol m_endSymbol;
65 
66  #endregion
67 
68  #region Constructors
69 
70  /// <summary>
71  /// Creates a new instance of <c>Grammar</c> class
72  /// </summary>
73  /// <param name="reader"></param>
74  public Grammar(BinaryReader reader)
75  {
76  if (reader == null)
77  {
78  throw new ArgumentNullException("reader");
79  }
80 
81  m_reader = reader;
82  Load();
83  }
84 
85  #endregion
86 
87  #region Public members
88 
89  /// <summary>
90  /// Gets the symbol table.
91  /// </summary>
92  public Symbol[] SymbolTable
93  {
94  get
95  {
96  return m_symbolTable;
97  }
98  }
99 
100  /// <summary>
101  /// Gets grammar name.
102  /// </summary>
103  public string Name
104  {
105  get { return m_name; }
106  }
107 
108  /// <summary>
109  /// Gets grammar version.
110  /// </summary>
111  public string Version
112  {
113  get { return m_version; }
114  }
115 
116  /// <summary>
117  /// Gets grammar author.
118  /// </summary>
119  public string Author
120  {
121  get { return m_author; }
122  }
123 
124  /// <summary>
125  /// Gets grammar description.
126  /// </summary>
127  public string About
128  {
129  get { return m_about; }
130  }
131 
132  /// <summary>
133  /// Gets the start symbol for the grammar.
134  /// </summary>
135  public Symbol StartSymbol
136  {
137  get { return m_symbolTable[m_startSymbolIndex]; }
138  }
139 
140  /// <summary>
141  /// Gets the value indicating if the grammar is case sensitive.
142  /// </summary>
143  public bool CaseSensitive
144  {
145  get { return m_caseSensitive; }
146  }
147 
148  /// <summary>
149  /// Gets initial DFA state.
150  /// </summary>
151  public DfaState DfaInitialState
152  {
153  get { return m_dfaInitialState; }
154  }
155 
156  /// <summary>
157  /// Gets initial LR state.
158  /// </summary>
159  public LRState InitialLRState
160  {
161  get { return m_lrStateTable[m_lrInitialState]; }
162  }
163 
164  /// <summary>
165  /// Gets a special symbol to designate last token in the input stream.
166  /// </summary>
167  public Symbol EndSymbol
168  {
169  get { return m_endSymbol; }
170  }
171 
172  #endregion
173 
174  #region Private members
175 
176  /// <summary>
177  /// Loads grammar from the binary reader.
178  /// </summary>
179  private void Load()
180  {
181  if (FileHeader != ReadString())
182  {
183  throw new FileLoadException(SR.GetString(SR.Grammar_WrongFileHeader));
184  }
185  while (m_reader.PeekChar() != -1)
186  {
187  RecordType recordType = ReadNextRecord();
188  switch (recordType)
189  {
190  case RecordType.Parameters:
191  ReadHeader();
192  break;
193 
194  case RecordType.TableCounts:
195  ReadTableCounts();
196  break;
197 
198  case RecordType.Initial:
199  ReadInitialStates();
200  break;
201 
202  case RecordType.Symbols:
203  ReadSymbols();
204  break;
205 
206  case RecordType.CharSets:
207  ReadCharSets();
208  break;
209 
210  case RecordType.Rules:
211  ReadRules();
212  break;
213 
214  case RecordType.DfaStates:
215  ReadDfaStates();
216  break;
217 
218  case RecordType.LRStates:
219  ReadLRStates();
220  break;
221 
222  default:
223  throw new FileLoadException(SR.GetString(SR.Grammar_InvalidRecordType));
224  }
225  }
226  m_dfaInitialState = m_dfaStateTable[m_dfaInitialStateIndex];
227  OptimizeDfaTransitionVectors();
228  }
229 
230  /// <summary>
231  /// Reads the next record in the binary grammar file.
232  /// </summary>
233  /// <returns>Read record type.</returns>
234  private RecordType ReadNextRecord()
235  {
236  char recordType = (char) ReadByte();
237  //Structure below is ready for future expansion
238  switch (recordType)
239  {
240  case 'M':
241  //Read the number of entry's
242  m_entryCount = ReadInt16();
243  return (RecordType) ReadByteEntry();
244 
245  default:
246  throw new FileLoadException(SR.GetString(SR.Grammar_InvalidRecordHeader));
247  }
248  }
249 
250  /// <summary>
251  /// Reads grammar header information.
252  /// </summary>
253  private void ReadHeader()
254  {
255  m_name = ReadStringEntry();
256  m_version = ReadStringEntry();
257  m_author = ReadStringEntry();
258  m_about = ReadStringEntry();
259  m_caseSensitive = ReadBoolEntry();
260  m_startSymbolIndex = ReadInt16Entry();
261  }
262 
263  /// <summary>
264  /// Reads table record counts and initializes tables.
265  /// </summary>
266  private void ReadTableCounts()
267  {
268  // Initialize tables
269  m_symbolTable = new Symbol [ReadInt16Entry()];
270  m_charSetTable = new String [ReadInt16Entry()];
271  m_ruleTable = new Rule [ReadInt16Entry()];
272  m_dfaStateTable = new DfaState [ReadInt16Entry()];
273  m_lrStateTable = new LRState [ReadInt16Entry()];
274  }
275 
276  /// <summary>
277  /// Read initial DFA and LR states.
278  /// </summary>
279  private void ReadInitialStates()
280  {
281  m_dfaInitialStateIndex = ReadInt16Entry();
282  m_lrInitialState = ReadInt16Entry();
283  }
284 
285  /// <summary>
286  /// Read symbol information.
287  /// </summary>
288  private void ReadSymbols()
289  {
290  int index = ReadInt16Entry();
291  string name = ReadStringEntry();
292  SymbolType symbolType = (SymbolType) ReadInt16Entry();
293 
294  Symbol symbol = new Symbol(index, name, symbolType);
295  switch (symbolType)
296  {
297  case SymbolType.Error:
298  m_errorSymbol = symbol;
299  break;
300 
301  case SymbolType.End:
302  m_endSymbol = symbol;
303  break;
304  }
305  m_symbolTable[index] = symbol;
306  }
307 
308  /// <summary>
309  /// Read char set information.
310  /// </summary>
311  private void ReadCharSets()
312  {
313  m_charSetTable[ReadInt16Entry()] = ReadStringEntry();
314  }
315 
316  /// <summary>
317  /// Read rule information.
318  /// </summary>
319  private void ReadRules()
320  {
321  int index = ReadInt16Entry();
322  Symbol nonTerminal = m_symbolTable[ReadInt16Entry()];
323  ReadEmptyEntry();
324  Symbol[] symbols = new Symbol[m_entryCount];
325  for (int i = 0 ; i < symbols.Length; i++)
326  {
327  symbols[i] = m_symbolTable[ReadInt16Entry()];
328  }
329  Rule rule = new Rule(index, nonTerminal, symbols);
330  m_ruleTable[index] = rule;
331  }
332 
333  /// <summary>
334  /// Read DFA state information.
335  /// </summary>
336  private void ReadDfaStates()
337  {
338  int index = ReadInt16Entry();
339  Symbol acceptSymbol = null;
340  bool acceptState = ReadBoolEntry();
341  if (acceptState)
342  {
343  acceptSymbol = m_symbolTable[ReadInt16Entry()];
344  }
345  else
346  {
347  ReadInt16Entry(); // Skip the entry.
348  }
349  ReadEmptyEntry();
350 
351  // Read DFA edges
352  DfaEdge[] edges = new DfaEdge[m_entryCount / 3];
353  for (int i = 0; i < edges.Length; i++)
354  {
355  edges[i].CharSetIndex = ReadInt16Entry();
356  edges[i].TargetIndex = ReadInt16Entry();
357  ReadEmptyEntry();
358  }
359 
360  // Create DFA state and store it in DFA state table
361  ObjectMap transitionVector = CreateDfaTransitionVector(edges);
362  DfaState dfaState = new DfaState(index, acceptSymbol, transitionVector);
363  m_dfaStateTable[index] = dfaState;
364  }
365 
366  /// <summary>
367  /// Read LR state information.
368  /// </summary>
369  private void ReadLRStates()
370  {
371  int index = ReadInt16Entry();
372  ReadEmptyEntry();
373  LRStateAction[] stateTable = new LRStateAction[m_entryCount / 4];
374  for (int i = 0; i < stateTable.Length; i++)
375  {
376  Symbol symbol = m_symbolTable[ReadInt16Entry()];
377  LRAction action = (LRAction) ReadInt16Entry();
378  int targetIndex = ReadInt16Entry();
379  ReadEmptyEntry();
380  stateTable[i] = new LRStateAction(i, symbol, action, targetIndex);
381  }
382 
383  // Create the transition vector
384  LRStateAction[] transitionVector = new LRStateAction[m_symbolTable.Length];
385  for (int i = 0; i < transitionVector.Length; i++)
386  {
387  transitionVector[i] = null;
388  }
389  for (int i = 0; i < stateTable.Length; i++)
390  {
391  transitionVector[stateTable[i].Symbol.Index] = stateTable[i];
392  }
393 
394  LRState lrState = new LRState(index, stateTable, transitionVector);
395  m_lrStateTable[index] = lrState;
396  }
397 
398  /// <summary>
399  /// Creates the DFA state transition vector.
400  /// </summary>
401  /// <param name="edges">Array of automata edges.</param>
402  /// <returns>Hashtable with the transition information.</returns>
403  private ObjectMap CreateDfaTransitionVector(DfaEdge[] edges)
404  {
405  ObjectMap transitionVector = new ObjectMap();
406  for (int i = edges.Length; --i >= 0;)
407  {
408  string charSet = m_charSetTable[edges[i].CharSetIndex];
409  for (int j = 0; j < charSet.Length; j++)
410  {
411  transitionVector[charSet[j]] = edges[i].TargetIndex;
412  }
413  }
414  return transitionVector;
415  }
416 
417  /// <summary>
418  /// Reads empty entry from the grammar file.
419  /// </summary>
420  private void ReadEmptyEntry()
421  {
422  if (ReadEntryType() != EntryType.Empty)
423  {
424  throw new FileLoadException(SR.GetString(SR.Grammar_EmptyEntryExpected));
425  }
426  m_entryCount--;
427  }
428 
429  /// <summary>
430  /// Reads string entry from the grammar file.
431  /// </summary>
432  /// <returns>String entry content.</returns>
433  private string ReadStringEntry()
434  {
435  if (ReadEntryType() != EntryType.String)
436  {
437  throw new FileLoadException(SR.GetString(SR.Grammar_StringEntryExpected));
438  }
439  m_entryCount--;
440  return ReadString();
441  }
442 
443  /// <summary>
444  /// Reads Int16 entry from the grammar file.
445  /// </summary>
446  /// <returns>Int16 entry content.</returns>
447  private int ReadInt16Entry()
448  {
449  if (ReadEntryType() != EntryType.Integer)
450  {
451  throw new FileLoadException(SR.GetString(SR.Grammar_IntegerEntryExpected));
452  }
453  m_entryCount--;
454  return ReadInt16();
455  }
456 
457  /// <summary>
458  /// Reads byte entry from the grammar file.
459  /// </summary>
460  /// <returns>Byte entry content.</returns>
461  private byte ReadByteEntry()
462  {
463  if (ReadEntryType() != EntryType.Byte)
464  {
465  throw new FileLoadException(SR.GetString(SR.Grammar_ByteEntryExpected));
466  }
467  m_entryCount--;
468  return ReadByte();
469  }
470 
471  /// <summary>
472  /// Reads boolean entry from the grammar file.
473  /// </summary>
474  /// <returns>Boolean entry content.</returns>
475  private bool ReadBoolEntry()
476  {
477  if (ReadEntryType() != EntryType.Boolean)
478  {
479  throw new FileLoadException(SR.GetString(SR.Grammar_BooleanEntryExpected));
480  }
481  m_entryCount--;
482  return ReadBool();
483  }
484 
485  /// <summary>
486  /// Reads entry type.
487  /// </summary>
488  /// <returns>Entry type.</returns>
489  private EntryType ReadEntryType()
490  {
491  if (m_entryCount == 0)
492  {
493  throw new FileLoadException(SR.GetString(SR.Grammar_NoEntry));
494  }
495  return (EntryType) ReadByte();
496  }
497 
498  /// <summary>
499  /// Reads string from the grammar file.
500  /// </summary>
501  /// <returns>String value.</returns>
502  private string ReadString()
503  {
504  StringBuilder result = new StringBuilder();
505  char unicodeChar = (char) ReadInt16();
506  while (unicodeChar != (char) 0)
507  {
508  result.Append(unicodeChar);
509  unicodeChar = (char) ReadInt16();
510  }
511  return result.ToString();
512  }
513 
514  /// <summary>
515  /// Reads two byte integer Int16 from the grammar file.
516  /// </summary>
517  /// <returns>Int16 value.</returns>
518  private int ReadInt16()
519  {
520  return m_reader.ReadUInt16();
521  }
522 
523  /// <summary>
524  /// Reads byte from the grammar file.
525  /// </summary>
526  /// <returns>Byte value.</returns>
527  private byte ReadByte()
528  {
529  return m_reader.ReadByte();
530  }
531 
532  /// <summary>
533  /// Reads boolean from the grammar file.
534  /// </summary>
535  /// <returns>Boolean value.</returns>
536  private bool ReadBool()
537  {
538  return (ReadByte() == 1);
539  }
540 
541  private void OptimizeDfaTransitionVectors()
542  {
543  DfaState[] dfaStates = m_dfaStateTable;
544  foreach (DfaState state in dfaStates)
545  {
546  ObjectMap transitions = state.m_transitionVector;
547  for (int i = transitions.Count; --i >= 0;)
548  {
549  int key = transitions.GetKey(i);
550  object transition = transitions[key];
551  if (transition != null)
552  {
553  int transitionIndex = (int) transition;
554  if (transitionIndex >= 0)
555  {
556  transitions[key] = dfaStates[transitionIndex];
557  }
558  else
559  {
560  transitions[key] = null;
561  }
562  }
563  }
564  transitions.ReadOnly = true;
565  }
566  }
567 
568  #endregion
569 
570  #region Private type definitions
571 
572  /// <summary>
573  /// Record type byte in the binary grammar file.
574  /// </summary>
575  private enum RecordType
576  {
577  Parameters = (int) 'P', // 80
578  TableCounts = (int) 'T', // 84
579  Initial = (int) 'I', // 73
580  Symbols = (int) 'S', // 83
581  CharSets = (int) 'C', // 67
582  Rules = (int) 'R', // 82
583  DfaStates = (int) 'D', // 68
584  LRStates = (int) 'L', // 76
585  Comment = (int) '!' // 33
586  }
587 
588  /// <summary>
589  /// Entry type byte in the binary grammar file.
590  /// </summary>
591  private enum EntryType
592  {
593  Empty = (int) 'E', // 69
594  Integer = (int) 'I', // 73
595  String = (int) 'S', // 83
596  Boolean = (int) 'B', // 66
597  Byte = (int) 'b' // 98
598  }
599 
600  /// <summary>
601  /// Edge between DFA states.
602  /// </summary>
603  private struct DfaEdge
604  {
605  public int CharSetIndex;
606  public int TargetIndex;
607  }
608 
609  #endregion
610  }
611 }
Comment category.