Paradox Game Engine  v1.0.0 beta06
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Events Macros Pages
IdentifierTerminal.cs
Go to the documentation of this file.
1 #region License
2 /* **********************************************************************************
3  * Copyright (c) Roman Ivantsov
4  * This source code is subject to terms and conditions of the MIT License
5  * for Irony. A copy of the license can be found in the License.txt file
6  * at the root of this distribution.
7  * By using this source code in any fashion, you are agreeing to be bound by the terms of the
8  * MIT License.
9  * You must not remove this notice from this software.
10  * **********************************************************************************/
11 #endregion
12 
13 using System;
14 using System.Collections.Generic;
15 using System.Text;
16 using System.Globalization;
17 
18 namespace Irony.Parsing {
19  #region notes
20  //Identifier terminal. Matches alpha-numeric sequences that usually represent identifiers and keywords.
21  // c#: @ prefix signals to not interpret as a keyword; allows \u escapes
22  //
23 
24  #endregion
25 
26  [Flags]
27  public enum IdOptions : short {
28  None = 0,
29  AllowsEscapes = 0x01,
30  CanStartWithEscape = 0x03, //bit 2 with bit 1 together
31 
32  IsNotKeyword = 0x10,
33  NameIncludesPrefix = 0x20,
34  }
35 
36  public enum CaseRestriction {
37  None,
38  FirstUpper,
39  FirstLower,
40  AllUpper,
41  AllLower
42  }
43 
44  public class UnicodeCategoryList : List<UnicodeCategory> { }
45 
47 
48  //Id flags for internal use
49  internal enum IdFlagsInternal : short {
50  HasEscapes = 0x100,
51  }
52 
53 
54  //Note that extraChars, extraFirstChars are used to form AllFirstChars and AllChars fields, which in turn
55  // are used in QuickParse. Only if QuickParse fails, the process switches to full version with checking every
56  // char's category
57  #region constructors and initialization
58  public IdentifierTerminal(string name) : this(name, IdOptions.None) {
59  }
60  public IdentifierTerminal(string name, IdOptions options) : this(name, "_", "_") {
61  Options = options;
62  }
63  public IdentifierTerminal(string name, string extraChars, string extraFirstChars): base(name) {
64  AllFirstChars = Strings.AllLatinLetters + extraFirstChars;
65  AllChars = Strings.AllLatinLetters + Strings.DecimalDigits + extraChars;
66  }
67 
68  public void AddPrefix(string prefix, IdOptions options) {
69  base.AddPrefixFlag(prefix, (short)options);
70  }
71  #endregion
72 
73  #region properties: AllChars, AllFirstChars
74  //Used in QuickParse only!
75  public string AllChars;
76  public string AllFirstChars;
77  public TokenEditorInfo KeywordEditorInfo = new TokenEditorInfo(TokenType.Keyword, TokenColor.Keyword, TokenTriggers.None);
78  public IdOptions Options; //flags for the case when there are no prefixes
80 
81  public readonly UnicodeCategoryList StartCharCategories = new UnicodeCategoryList(); //categories of first char
82  public readonly UnicodeCategoryList CharCategories = new UnicodeCategoryList(); //categories of all other chars
83  public readonly UnicodeCategoryList CharsToRemoveCategories = new UnicodeCategoryList(); //categories of chars to remove from final id, usually formatting category
84  #endregion
85 
86  #region overrides
87  public override void Init(GrammarData grammarData) {
88  base.Init(grammarData);
89  AllChars = AllChars?? String.Empty;
90  AllFirstChars = AllFirstChars ?? string.Empty;
91  //Adjust case restriction. We adjust only first chars; if first char is ok, we will scan the rest without restriction
92  // and then check casing for entire identifier
93  switch(CaseRestriction) {
94  case CaseRestriction.AllLower:
95  case CaseRestriction.FirstLower:
96  AllFirstChars = AllFirstChars.ToLower();
97  break;
98  case CaseRestriction.AllUpper:
99  case CaseRestriction.FirstUpper:
100  AllFirstChars = AllFirstChars.ToUpper();
101  break;
102  }
103  //if there are "first" chars defined by categories, add the terminal to FallbackTerminals
104  if (this.StartCharCategories.Count > 0)
105  Grammar.FallbackTerminals.Add(this);
106  if (this.EditorInfo == null)
107  this.EditorInfo = new TokenEditorInfo(TokenType.Identifier, TokenColor.Identifier, TokenTriggers.None);
108  if (this.AstNodeType == null && this.AstNodeCreator == null && grammarData.Grammar.FlagIsSet(LanguageFlags.CreateAst))
109  this.AstNodeType = typeof(Irony.Interpreter.Ast.IdentifierNode);
110  }
111 
112  //TODO: put into account non-Ascii aplhabets specified by means of Unicode categories!
113  public override IList<string> GetFirsts() {
114  StringList list = new StringList();
115  list.AddRange(Prefixes);
116  if (string.IsNullOrEmpty(AllFirstChars))
117  return list;
118  char[] chars = AllFirstChars.ToCharArray();
119  foreach (char ch in chars)
120  list.Add(ch.ToString());
121  if ((Options & IdOptions.CanStartWithEscape) != 0)
122  list.Add(this.EscapeChar.ToString());
123  return list;
124  }
125 
126  private void AdjustCasing() {
127  switch(CaseRestriction) {
128  case CaseRestriction.None: break;
129  case CaseRestriction.FirstLower:
130  AllFirstChars = AllFirstChars.ToLower();
131  break;
132  case CaseRestriction.FirstUpper:
133  AllFirstChars = AllFirstChars.ToUpper();
134  break;
135  case CaseRestriction.AllLower:
136  AllFirstChars = AllFirstChars.ToLower();
137  AllChars = AllChars.ToLower();
138  break;
139  case CaseRestriction.AllUpper:
140  AllFirstChars = AllFirstChars.ToUpper();
141  AllChars = AllChars.ToUpper();
142  break;
143  }//switch
144  }//method
145 
146  protected override void InitDetails(ParsingContext context, CompoundTokenDetails details) {
147  base.InitDetails(context, details);
148  details.Flags = (short)Options;
149  }
150 
151  //Override to assign IsKeyword flag to keyword tokens
152  protected override Token CreateToken(ParsingContext context, ISourceStream source, CompoundTokenDetails details) {
153  Token token = base.CreateToken(context, source, details);
154  if (details.IsSet((short)IdOptions.IsNotKeyword))
155  return token;
156  //check if it is keyword
157  CheckReservedWord(token);
158  return token;
159  }
160  private void CheckReservedWord(Token token) {
161  KeyTerm keyTerm;
162  if (Grammar.KeyTerms.TryGetValue(token.Text, out keyTerm)) {
163  token.KeyTerm = keyTerm;
164  //if it is reserved word, then overwrite terminal
165  if (keyTerm.FlagIsSet(TermFlags.IsReservedWord))
166  token.SetTerminal(keyTerm);
167  }
168  }
169 
170  protected override Token QuickParse(ParsingContext context, ISourceStream source) {
171  if (AllFirstChars.IndexOf(source.PreviewChar) < 0)
172  return null;
173  source.PreviewPosition++;
174  while (AllChars.IndexOf(source.PreviewChar) >= 0 && !source.EOF())
175  source.PreviewPosition++;
176  //if it is not a terminator then cancel; we need to go through full algorithm
177  if (GrammarData.WhitespaceAndDelimiters.IndexOf(source.PreviewChar) < 0) return null;
178  var token = source.CreateToken(this.OutputTerminal);
179  if(CaseRestriction != CaseRestriction.None && !CheckCaseRestriction(token.ValueString))
180  return null;
181  //!!! Do not convert to common case (all-lower) for case-insensitive grammar. Let identifiers remain as is,
182  // it is responsibility of interpreter to provide case-insensitive read/write operations for identifiers
183  // if (!this.GrammarData.Grammar.CaseSensitive)
184  // token.Value = token.Text.ToLower(CultureInfo.InvariantCulture);
185  CheckReservedWord(token);
186  return token;
187  }
188 
189  protected override bool ReadBody(ISourceStream source, CompoundTokenDetails details) {
190  int start = source.PreviewPosition;
191  bool allowEscapes = details.IsSet((short)IdOptions.AllowsEscapes);
192  CharList outputChars = new CharList();
193  while (!source.EOF()) {
194  char current = source.PreviewChar;
195  if (GrammarData.WhitespaceAndDelimiters.IndexOf(current) >= 0) break;
196  if (allowEscapes && current == this.EscapeChar) {
197  current = ReadUnicodeEscape(source, details);
198  //We need to back off the position. ReadUnicodeEscape sets the position to symbol right after escape digits.
199  //This is the char that we should process in next iteration, so we must backup one char, to pretend the escaped
200  // char is at position of last digit of escape sequence.
201  source.PreviewPosition--;
202  if (details.Error != null)
203  return false;
204  }
205  //Check if current character is OK
206  if (!CharOk(current, source.PreviewPosition == start))
207  break;
208  //Check if we need to skip this char
209  UnicodeCategory currCat = char.GetUnicodeCategory(current); //I know, it suxx, we do it twice, fix it later
210  if (!this.CharsToRemoveCategories.Contains(currCat))
211  outputChars.Add(current); //add it to output (identifier)
212  source.PreviewPosition++;
213  }//while
214  if (outputChars.Count == 0)
215  return false;
216  //Convert collected chars to string
217  details.Body = new string(outputChars.ToArray());
218  if (!CheckCaseRestriction(details.Body))
219  return false;
220  return !string.IsNullOrEmpty(details.Body);
221  }
222 
223  private bool CharOk(char ch, bool first) {
224  //first check char lists, then categories
225  string all = first? AllFirstChars : AllChars;
226  if(all.IndexOf(ch) >= 0) return true;
227  //check categories
228  UnicodeCategory chCat = char.GetUnicodeCategory(ch);
229  UnicodeCategoryList catList = first ? StartCharCategories : CharCategories;
230  if (catList.Contains(chCat)) return true;
231  return false;
232  }
233 
234  private bool CheckCaseRestriction(string body) {
235  switch(CaseRestriction) {
236  case CaseRestriction.FirstLower: return Char.IsLower(body, 0);
237  case CaseRestriction.FirstUpper: return Char.IsUpper(body, 0);
238  case CaseRestriction.AllLower: return body.ToLower() == body;
239  case CaseRestriction.AllUpper: return body.ToUpper() == body;
240  default : return true;
241  }
242  }//method
243 
244 
245  private char ReadUnicodeEscape(ISourceStream source, CompoundTokenDetails details) {
246  //Position is currently at "\" symbol
247  source.PreviewPosition++; //move to U/u char
248  int len;
249  switch (source.PreviewChar) {
250  case 'u': len = 4; break;
251  case 'U': len = 8; break;
252  default:
253  details.Error = Resources.ErrInvEscSymbol; // "Invalid escape symbol, expected 'u' or 'U' only."
254  return '\0';
255  }
256  if (source.PreviewPosition + len > source.Text.Length) {
257  details.Error = Resources.ErrInvEscSeq; // "Invalid escape sequence";
258  return '\0';
259  }
260  source.PreviewPosition++; //move to the first digit
261  string digits = source.Text.Substring(source.PreviewPosition, len);
262  char result = (char)Convert.ToUInt32(digits, 16);
263  source.PreviewPosition += len;
264  details.Flags |= (int) IdFlagsInternal.HasEscapes;
265  return result;
266  }
267 
268  protected override bool ConvertValue(CompoundTokenDetails details) {
269  if (details.IsSet((short)IdOptions.NameIncludesPrefix))
270  details.Value = details.Prefix + details.Body;
271  else
272  details.Value = details.Body;
273  return true;
274  }
275 
276  #endregion
277 
278  }//class
279 
280 
281 } //namespace
void SetTerminal(Terminal terminal)
Sets the terminal.
Definition: Token.cs:208
override IList< string > GetFirsts()
char PreviewChar
Gets a char at preview position
bool FlagIsSet(LanguageFlags flag)
Definition: Grammar.cs:42
override void Init(GrammarData grammarData)
static string ErrInvEscSymbol
Looks up a localized string similar to Invalid escape symbol, expected 'u' or 'U' only...
override bool ReadBody(ISourceStream source, CompoundTokenDetails details)
override void InitDetails(ParsingContext context, CompoundTokenDetails details)
string Text
Gets the text associated with this token.
Definition: Token.cs:145
KeyTermTable KeyTerms
Definition: Grammar.cs:458
Interface for Terminals to access the source stream and produce tokens.
int PreviewPosition
Gets or sets the current preview position in the source file. Must be greater or equal to Location...
Flags
Enumeration of the new Assimp's flags.
delegate void AstNodeCreator(ParsingContext context, ParseTreeNode parseNode)
IdentifierTerminal(string name, IdOptions options)
static string ErrInvEscSeq
Looks up a localized string similar to Invalid escape sequence..
readonly Grammar Grammar
Definition: GrammarData.cs:24
override Token QuickParse(ParsingContext context, ISourceStream source)
void AddPrefix(string prefix, IdOptions options)
HRESULT Convert(_In_ const Image &srcImage, _In_ DXGI_FORMAT format, _In_ DWORD filter, _In_ float threshold, _Out_ ScratchImage &image)
override bool ConvertValue(CompoundTokenDetails details)
override Token CreateToken(ParsingContext context, ISourceStream source, CompoundTokenDetails details)
Tokens are produced by scanner and fed to parser, optionally passing through Token filters in between...
Definition: Token.cs:74
IdentifierTerminal(string name, string extraChars, string extraFirstChars)