Paradox Game Engine  v1.0.0 beta06
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Events Macros Pages
DefaultScanner.cs
Go to the documentation of this file.
1 #region License
2 /* **********************************************************************************
3  * Copyright (c) Roman Ivantsov
4  * This source code is subject to terms and conditions of the MIT License
5  * for Irony. A copy of the license can be found in the License.txt file
6  * at the root of this distribution.
7  * By using this source code in any fashion, you are agreeing to be bound by the terms of the
8  * MIT License.
9  * You must not remove this notice from this software.
10  * **********************************************************************************/
11 #endregion
12 
13 using System;
14 using System.Collections.Generic;
15 using System.Globalization;
16 using System.Text;
17 
18 namespace Irony.Parsing {
19 
20  //Scanner class. The Scanner's function is to transform a stream of characters into aggregates/words or lexemes,
21  // like identifier, number, literal, etc.
22 
23  public class DefaultScanner : Scanner
24  {
25  #region Properties and Fields: Data, _source
26 
27  //buffered tokens can come from expanding a multi-token, when Terminal.TryMatch() returns several tokens packed into one token
28 
29  #endregion
30 
31  private SourceStream SourceStream;
32 
33 
34  protected override void PrepareInput()
35  {
36  SourceStream = new SourceStream(this.Data, Context.TabWidth);
37  }
38 
39  #region Scanning tokens
40  protected override void NextToken() {
41  //1. Check if there are buffered tokens
42  if(Context.BufferedTokens.Count > 0) {
43  Context.CurrentToken = Context.BufferedTokens.Pop();
44  return;
45  }
46  //2. Skip whitespace. We don't need to check for EOF: at EOF we start getting 0-char, so we'll get out automatically
47  while (Grammar.WhitespaceChars.IndexOf(SourceStream.PreviewChar) >= 0)
49  //3. That's the token start, calc location (line and column)
51  //4. Check for EOF
52  if (SourceStream.EOF()) {
53  Context.CurrentToken = new Token(Grammar.Eof, SourceStream.Location, string.Empty, Grammar.Eof.Name);;
54  return;
55  }
56  //5. Actually scan the source text and construct a new token
57  ScanToken();
58  }//method
59 
60  //Scans the source text and constructs a new token
61  private void ScanToken() {
62  if (!MatchNonGrammarTerminals() && !MatchRegularTerminals()) {
63  //we are in error already; try to match ANY terminal and let the parser report an error
64  MatchAllTerminals(); //try to match any terminal out there
65  }
66  var token = Context.CurrentToken;
67  //If we have normal token then return it
68  if (token != null && !token.IsError()) {
69  //set position to point after the result token
70  SourceStream.PreviewPosition = SourceStream.Location.Position + token.Length;
71  SourceStream.MoveLocationToPreviewPosition();
72  return;
73  }
74  //we have an error: either error token or no token at all
75  if (token == null) //if no token then create error token
76  Context.CurrentToken = SourceStream.CreateErrorToken(Resources.ErrInvalidChar, SourceStream.PreviewChar);
77  Recover();
78  }
79 
80  private bool MatchNonGrammarTerminals() {
81  TerminalList terms;
82  if (!Data.NonGrammarTerminalsLookup.TryGetValue(SourceStream.PreviewChar, out terms))
83  return false;
84  foreach(var term in terms) {
85  SourceStream.ResetPreviewPosition();
86  Context.CurrentToken = term.TryMatch(Context, SourceStream);
87  if (Context.CurrentToken != null)
88  term.InvokeValidateToken(Context);
89  if (Context.CurrentToken != null) {
90  //check if we need to fire LineStart token before this token;
91  // we do it only if the token is not a comment; comments should be ignored by the outline logic
92  var token = Context.CurrentToken;
93  if (token.Category == TokenCategory.Content && NeedLineStartToken(token.Location)) {
94  Context.BufferedTokens.Push(token); //buffer current token; we'll eject LineStart instead
95  SourceStream.Location = token.Location; //set it back to the start of the token
96  Context.CurrentToken = SourceStream.CreateToken(Grammar.LineStartTerminal); //generate LineStart
97  Context.PreviousLineStart = SourceStream.Location; //update LineStart
98  }
99  return true;
100  }//if
101  }//foreach term
102  SourceStream.ResetPreviewPosition();
103  return false;
104  }
105 
106  private bool NeedLineStartToken(SourceLocation forLocation) {
107  return Grammar.FlagIsSet(LanguageFlags.EmitLineStartToken) && forLocation.Line > Context.PreviousLineStart.Line;
108  }
109 
110  private bool MatchRegularTerminals() {
111  //We need to eject LineStart BEFORE we try to produce a real token; this LineStart token should reach
112  // the parser, make it change the state and with it to change the set of expected tokens. So when we
113  // finally move to scan the real token, the expected terminal set is correct.
114  if (NeedLineStartToken(SourceStream.Location)) {
115  Context.CurrentToken = SourceStream.CreateToken(Grammar.LineStartTerminal);
116  Context.PreviousLineStart = SourceStream.Location;
117  return true;
118  }
119  //Find matching terminal
120  // First, try terminals with explicit "first-char" prefixes, selected by current char in source
121  ComputeCurrentTerminals();
122  //If we have more than one candidate; let grammar method select
123  if (Context.CurrentTerminals.Count > 1)
124  Grammar.OnScannerSelectTerminal(Context);
125 
126  MatchTerminals();
127  //If we don't have a token from terminals, try Grammar's method
128  if (Context.CurrentToken == null)
129  Context.CurrentToken = Grammar.TryMatch(Context, SourceStream);
130  if (Context.CurrentToken is MultiToken)
131  UnpackMultiToken();
132  return Context.CurrentToken != null;
133  }//method
134 
135  // This method is a last attempt by scanner to match ANY terminal, after regular matching (by input char) had failed.
136  // Likely this will produce some token which is invalid for current parser state (for ex, identifier where a number
137  // is expected); in this case the parser will report an error as "Error: expected number".
138  // if this matching fails, the scanner will produce an error as "unexpected character."
139  private bool MatchAllTerminals() {
140  Context.CurrentTerminals.Clear();
141  Context.CurrentTerminals.AddRange(Data.Language.GrammarData.Terminals);
142  MatchTerminals();
143  if (Context.CurrentToken is MultiToken)
144  UnpackMultiToken();
145  return Context.CurrentToken != null;
146  }
147 
148  //If token is MultiToken then push all its child tokens into _bufferdTokens and return the first token in buffer
149  private void UnpackMultiToken() {
150  var mtoken = Context.CurrentToken as MultiToken;
151  if (mtoken == null) return;
152  for (int i = mtoken.ChildTokens.Count-1; i >= 0; i--)
153  Context.BufferedTokens.Push(mtoken.ChildTokens[i]);
154  Context.CurrentToken = Context.BufferedTokens.Pop();
155  }
156 
157  private void ComputeCurrentTerminals() {
158  Context.CurrentTerminals.Clear();
159  TerminalList termsForCurrentChar;
160  if(!Data.TerminalsLookup.TryGetValue(SourceStream.PreviewChar, out termsForCurrentChar))
161  termsForCurrentChar = Data.FallbackTerminals;
162  //if we are recovering, previewing or there's no parser state, then return list as is
163  if(Context.Status == ParserStatus.Recovering || Context.Status == ParserStatus.Previewing
164  || Context.CurrentParserState == null || Grammar.FlagIsSet(LanguageFlags.DisableScannerParserLink)
165  || Context.Mode == ParseMode.VsLineScan) {
166  Context.CurrentTerminals.AddRange(termsForCurrentChar);
167  return;
168  }
169  // Try filtering terms by checking with parser which terms it expects;
170  var parserState = Context.CurrentParserState;
171  foreach(var term in termsForCurrentChar) {
172  //Note that we check the OutputTerminal with parser, not the term itself;
173  //in most cases it is the same as term, but not always
174  if (parserState.ExpectedTerminals.Contains(term.OutputTerminal) || Grammar.NonGrammarTerminals.Contains(term))
175  Context.CurrentTerminals.Add(term);
176  }
177 
178  }//method
179 
180  private void MatchTerminals() {
181  Token priorToken = null;
182  foreach (Terminal term in Context.CurrentTerminals) {
183  // If we have priorToken from prior term in the list, check if prior term has higher priority than this term;
184  // if term.Priority is lower then we don't need to check anymore, higher priority (in prior token) wins
185  // Note that terminals in the list are sorted in descending priority order
186  if (priorToken != null && priorToken.Terminal.Priority > term.Priority)
187  return;
188  //Reset source position and try to match
189  SourceStream.ResetPreviewPosition();
190  var token = term.TryMatch(Context, SourceStream);
191  if (token == null) continue;
192  //skip it if it is shorter than previous token
193  if (priorToken != null && !priorToken.IsError() && (token.Length < priorToken.Length))
194  continue;
195  Context.CurrentToken = token; //now it becomes current token
196  term.InvokeValidateToken(Context); //validate it
197  if (Context.CurrentToken != null)
198  priorToken = Context.CurrentToken;
199  }
200  }//method
201 
202  #endregion
203 
204  #region VS Integration methods
205  //Use this method for VS integration; VS language package requires scanner that returns tokens one-by-one.
206  // Start and End positions required by this scanner may be derived from Token :
207  // start=token.Location.Position; end=start + token.Length;
208  public Token VsReadToken(ref int state) {
209  Context.VsLineScanState.Value = state;
210  if (SourceStream.EOF()) return null;
211  if (state == 0)
212  NextToken();
213  else {
214  Terminal term = Data.MultilineTerminals[Context.VsLineScanState.TerminalIndex - 1];
215  Context.CurrentToken = term.TryMatch(Context, SourceStream);
216  }
217  //set state value from context
218  state = Context.VsLineScanState.Value;
219  if (Context.CurrentToken != null && Context.CurrentToken.Terminal == Grammar.Eof)
220  return null;
221  return Context.CurrentToken;
222  }
223  public void VsSetSource(string text, int offset) {
224  SourceStream.SetText(text, offset, true);
225  }
226  #endregion
227 
228  #region Error recovery
229  private bool Recover() {
230  SourceStream.PreviewPosition++;
231  var wsd = Data.Language.GrammarData.WhitespaceAndDelimiters;
232  while (!SourceStream.EOF()) {
233  if(wsd.IndexOf(SourceStream.PreviewChar) >= 0) {
234  SourceStream.MoveLocationToPreviewPosition();
235  return true;
236  }
237  SourceStream.PreviewPosition++;
238  }
239  return false;
240  }
241  #endregion
242 
243  #region TokenPreview
244  //Preview mode allows custom code in grammar to help parser decide on appropriate action in case of conflict
245  // Preview process is simply searching for particular tokens in "preview set", and finding out which of the
246  // tokens will come first.
247  // In preview mode, tokens returned by FetchToken are collected in _previewTokens list; after finishing preview
248  // the scanner "rolls back" to original position - either by directly restoring the position, or moving the preview
249  // tokens into _bufferedTokens list, so that they will read again by parser in normal mode.
250  // See c# grammar sample for an example of using preview methods
251  SourceLocation _previewStartLocation;
252 
253  //Switches Scanner into preview mode
254  public override void BeginPreview() {
255  base.BeginPreview();
256  _previewStartLocation = SourceStream.Location;
257  }
258 
259  //Ends preview mode
260  public override void EndPreview(bool keepPreviewTokens) {
261  base.EndPreview(keepPreviewTokens);
262  if (!keepPreviewTokens) {
263  Context.SetSourceLocation(_previewStartLocation);
264  }
265  }
266  #endregion
267 
268 
269  }//class
270 
271 }//namespace
TokenCategory
Token category.
Definition: Token.cs:29
Scanner base class. The Scanner's function is to transform a stream of characters into aggregates/wor...
Definition: Scanner.cs:22
override void NextToken()
Retrieves the next token.
override void EndPreview(bool keepPreviewTokens)
Ends the preview.
void VsSetSource(string text, int offset)
Token VsReadToken(ref int state)
readonly Terminal Eof
Definition: Grammar.cs:425
static SourceLocation Empty
override void PrepareInput()
Prepares the input.
string WhitespaceChars
Definition: Grammar.cs:34
Tokens are produced by scanner and fed to parser, optionally passing through Token filters in between...
Definition: Token.cs:74
static string ErrInvalidChar
Looks up a localized string similar to Invalid character: '{0}'..
override void BeginPreview()
Begins the preview.