Paradox Game Engine  v1.0.0 beta06
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Events Macros Pages
StringLiteral.cs
Go to the documentation of this file.
1 #region License
2 /* **********************************************************************************
3  * Copyright (c) Roman Ivantsov
4  * This source code is subject to terms and conditions of the MIT License
5  * for Irony. A copy of the license can be found in the License.txt file
6  * at the root of this distribution.
7  * By using this source code in any fashion, you are agreeing to be bound by the terms of the
8  * MIT License.
9  * You must not remove this notice from this software.
10  * **********************************************************************************/
11 #endregion
12 
13 using System;
14 using System.Collections.Generic;
15 using System.Text;
16 using System.Diagnostics;
17 
18 namespace Irony.Parsing {
19 
20  [Flags]
21  public enum StringOptions : short {
22  None = 0,
23  IsChar = 0x01,
24  AllowsDoubledQuote = 0x02, //Convert doubled start/end symbol to a single symbol; for ex. in SQL, '' -> '
25  AllowsLineBreak = 0x04,
26  IsTemplate = 0x08, //Can include embedded expressions that should be evaluated on the fly; ex in Ruby: "hello #{name}"
27  NoEscapes = 0x10,
28  AllowsUEscapes = 0x20,
29  AllowsXEscapes = 0x40,
30  AllowsOctalEscapes = 0x80,
32 
33  }
34 
35  //Container for settings of tempate string parser, to interpet strings having embedded values or expressions
36  // like in Ruby:
37  // "Hello, #{name}"
38  // Default values match settings for Ruby strings
39  public class StringTemplateSettings {
40  public string StartTag = "#{";
41  public string EndTag = "}";
43  }
44 
46 
47  public enum StringFlagsInternal : short {
48  HasEscapes = 0x100,
49  }
50 
51  #region StringSubType
52  class StringSubType {
53  internal readonly string Start, End;
54  internal readonly StringOptions Flags;
55  internal readonly byte Index;
56  internal StringSubType(string start, string end, StringOptions flags, byte index) {
57  Start = start;
58  End = end;
59  Flags = flags;
60  Index = index;
61  }
62 
63  internal static int LongerStartFirst(StringSubType x, StringSubType y) {
64  try {//in case any of them is null
65  if (x.Start.Length > y.Start.Length) return -1;
66  } catch { }
67  return 0;
68  }
69  }
70  class StringSubTypeList : List<StringSubType> {
71  internal void Add(string start, string end, StringOptions flags) {
72  base.Add(new StringSubType(start, end, flags, (byte) this.Count));
73  }
74  }
75  #endregion
76 
77  #region constructors and initialization
78  public StringLiteral(string name): base(name) {
79  base.SetFlag(TermFlags.IsLiteral);
80  base.AstNodeType = typeof(Irony.Interpreter.Ast.LiteralValueNode);
81  }
82 
83  public StringLiteral(string name, string startEndSymbol, StringOptions options) : this(name) {
84  _subtypes.Add(startEndSymbol, startEndSymbol, options);
85  }
86 
87  public StringLiteral(string name, string startEndSymbol) : this(name, startEndSymbol, StringOptions.None) { }
88 
89  public StringLiteral(string name, string startEndSymbol, StringOptions options, Type astNodeType)
90  : this(name, startEndSymbol, options) {
91  base.AstNodeType = astNodeType;
92  }
93  public StringLiteral(string name, string startEndSymbol, StringOptions options, AstNodeCreator astNodeCreator)
94  : this(name, startEndSymbol, options) {
95  base.AstNodeCreator = astNodeCreator;
96  }
97 
98  public void AddStartEnd(string startEndSymbol, StringOptions stringOptions) {
99  AddStartEnd(startEndSymbol, startEndSymbol, stringOptions);
100  }
101  public void AddStartEnd(string startSymbol, string endSymbol, StringOptions stringOptions) {
102  _subtypes.Add(startSymbol, endSymbol, stringOptions);
103  }
104  public void AddPrefix(string prefix, StringOptions flags) {
105  base.AddPrefixFlag(prefix, (short)flags);
106  }
107 
108  #endregion
109 
110  #region Properties/Fields
111  private readonly StringSubTypeList _subtypes = new StringSubTypeList();
112  string _startSymbolsFirsts; //first chars of start-end symbols
113  #endregion
114 
115  #region overrides: Init, GetFirsts, ReadBody, etc...
116  public override void Init(GrammarData grammarData) {
117  base.Init(grammarData);
118  _startSymbolsFirsts = string.Empty;
119  if (_subtypes.Count == 0) {
120  grammarData.Language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrInvStrDef, this.Name); //"Error in string literal [{0}]: No start/end symbols specified."
121  return;
122  }
123  //collect all start-end symbols in lists and create strings of first chars
124  var allStartSymbols = new StringSet(); //to detect duplicate start symbols
125  _subtypes.Sort(StringSubType.LongerStartFirst);
126  bool isTemplate = false;
127  foreach (StringSubType subType in _subtypes) {
128  if (allStartSymbols.Contains(subType.Start))
129  grammarData.Language.Errors.Add(GrammarErrorLevel.Error, null,
130  Resources.ErrDupStartSymbolStr, subType.Start, this.Name); //"Duplicate start symbol {0} in string literal [{1}]."
131  allStartSymbols.Add(subType.Start);
132  _startSymbolsFirsts += subType.Start[0].ToString();
133  if ((subType.Flags & StringOptions.IsTemplate) != 0) isTemplate = true;
134  }
135  if (!CaseSensitive)
136  _startSymbolsFirsts = _startSymbolsFirsts.ToLower() + _startSymbolsFirsts.ToUpper();
137  //Set multiline flag
138  foreach (StringSubType info in _subtypes) {
139  if ((info.Flags & StringOptions.AllowsLineBreak) != 0) {
140  SetFlag(TermFlags.IsMultiline);
141  break;
142  }
143  }
144  //For templates only
145  if(isTemplate) {
146  //Check that template settings object is provided
147  var templateSettings = this.AstNodeConfig as StringTemplateSettings;
148  if(templateSettings == null)
149  grammarData.Language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrTemplNoSettings, this.Name); //"Error in string literal [{0}]: IsTemplate flag is set, but TemplateSettings is not provided."
150  else if (templateSettings.ExpressionRoot == null)
151  grammarData.Language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrTemplMissingExprRoot, this.Name); //""
152  else if(!Grammar.SnippetRoots.Contains(templateSettings.ExpressionRoot))
153  grammarData.Language.Errors.Add(GrammarErrorLevel.Error, null, Resources.ErrTemplExprNotRoot, this.Name); //""
154  }//if
155  //Create editor info
156  if (this.EditorInfo == null)
157  this.EditorInfo = new TokenEditorInfo(TokenType.String, TokenColor.String, TokenTriggers.None);
158  }//method
159 
160  public override IList<string> GetFirsts() {
161  StringList result = new StringList();
162  result.AddRange(Prefixes);
163  //we assume that prefix is always optional, so string can start with start-end symbol
164  foreach (char ch in _startSymbolsFirsts)
165  result.Add(ch.ToString());
166  return result;
167  }
168 
169  protected override bool ReadBody(ISourceStream source, CompoundTokenDetails details) {
170  if (!details.PartialContinues) {
171  if (!ReadStartSymbol(source, details)) return false;
172  }
173  return CompleteReadBody(source, details);
174  }
175 
176  private bool CompleteReadBody(ISourceStream source, CompoundTokenDetails details) {
177  bool escapeEnabled = !details.IsSet((short) StringOptions.NoEscapes);
178  int start = source.PreviewPosition;
179  string endQuoteSymbol = details.EndSymbol;
180  string endQuoteDoubled = endQuoteSymbol + endQuoteSymbol; //doubled quote symbol
181  bool lineBreakAllowed = details.IsSet((short) StringOptions.AllowsLineBreak);
182  //1. Find the string end
183  // first get the position of the next line break; we are interested in it to detect malformed string,
184  // therefore do it only if linebreak is NOT allowed; if linebreak is allowed, set it to -1 (we don't care).
185  int nlPos = lineBreakAllowed ? -1 : source.Text.IndexOf('\n', source.PreviewPosition);
186  //fix by ashmind for EOF right after opening symbol
187  while (true) {
188  int endPos = source.Text.IndexOf(endQuoteSymbol, source.PreviewPosition);
189  //Check for partial token in line-scanning mode
190  if (endPos < 0 && details.PartialOk && lineBreakAllowed) {
191  ProcessPartialBody(source, details);
192  return true;
193  }
194  //Check for malformed string: either EndSymbol not found, or LineBreak is found before EndSymbol
195  bool malformed = endPos < 0 || nlPos >= 0 && nlPos < endPos;
196  if (malformed) {
197  //Set source position for recovery: move to the next line if linebreak is not allowed.
198  if (nlPos > 0) endPos = nlPos;
199  if (endPos > 0) source.PreviewPosition = endPos + 1;
200  details.Error = Resources.ErrBadStrLiteral;// "Mal-formed string literal - cannot find termination symbol.";
201  return true; //we did find start symbol, so it is definitely string, only malformed
202  }//if malformed
203 
204  if (source.EOF())
205  return true;
206 
207  //We found EndSymbol - check if it is escaped; if yes, skip it and continue search
208  if (escapeEnabled && IsEndQuoteEscaped(source.Text, endPos)) {
209  source.PreviewPosition = endPos + endQuoteSymbol.Length;
210  continue; //searching for end symbol
211  }
212 
213  //Check if it is doubled end symbol
214  source.PreviewPosition = endPos;
215  if (details.IsSet((short)StringOptions.AllowsDoubledQuote) && source.MatchSymbol(endQuoteDoubled, !CaseSensitive)) {
216  source.PreviewPosition = endPos + endQuoteDoubled.Length;
217  continue;
218  }//checking for doubled end symbol
219 
220  //Ok, this is normal endSymbol that terminates the string.
221  // Advance source position and get out from the loop
222  details.Body = source.Text.Substring(start, endPos - start);
223  source.PreviewPosition = endPos + endQuoteSymbol.Length;
224  return true; //if we come here it means we're done - we found string end.
225  } //end of loop to find string end;
226  }
227  private void ProcessPartialBody(ISourceStream source, CompoundTokenDetails details) {
228  int from = source.PreviewPosition;
229  source.PreviewPosition = source.Text.Length;
230  details.Body = source.Text.Substring(from, source.PreviewPosition - from);
231  details.IsPartial = true;
232  }
233 
234  protected override void InitDetails(ParsingContext context, CompoundTerminalBase.CompoundTokenDetails details) {
235  base.InitDetails(context, details);
236  if (context.VsLineScanState.Value != 0) {
237  //we are continuing partial string on the next line
238  details.Flags = context.VsLineScanState.TerminalFlags;
239  details.SubTypeIndex = context.VsLineScanState.TokenSubType;
240  var stringInfo = _subtypes[context.VsLineScanState.TokenSubType];
241  details.StartSymbol = stringInfo.Start;
242  details.EndSymbol = stringInfo.End;
243  }
244  }
245 
246  protected override void ReadSuffix(ISourceStream source, CompoundTerminalBase.CompoundTokenDetails details) {
247  base.ReadSuffix(source, details);
248  //"char" type can be identified by suffix (like VB where c suffix identifies char)
249  // in this case we have details.TypeCodes[0] == char and we need to set the IsChar flag
250  if (details.TypeCodes != null && details.TypeCodes[0] == TypeCode.Char)
251  details.Flags |= (int)StringOptions.IsChar;
252  else
253  //we may have IsChar flag set (from startEndSymbol, like in c# single quote identifies char)
254  // in this case set type code
255  if (details.IsSet((short) StringOptions.IsChar))
256  details.TypeCodes = new TypeCode[] { TypeCode.Char };
257  }
258 
259  private bool IsEndQuoteEscaped(string text, int quotePosition) {
260  bool escaped = false;
261  int p = quotePosition - 1;
262  while (p > 0 && text[p] == EscapeChar) {
263  escaped = !escaped;
264  p--;
265  }
266  return escaped;
267  }
268 
269  private bool ReadStartSymbol(ISourceStream source, CompoundTokenDetails details) {
270  if (_startSymbolsFirsts.IndexOf(source.PreviewChar) < 0)
271  return false;
272  foreach (StringSubType subType in _subtypes) {
273  if (!source.MatchSymbol(subType.Start, !CaseSensitive))
274  continue;
275  //We found start symbol
276  details.StartSymbol = subType.Start;
277  details.EndSymbol = subType.End;
278  details.Flags |= (short) subType.Flags;
279  details.SubTypeIndex = subType.Index;
280  source.PreviewPosition += subType.Start.Length;
281  return true;
282  }//foreach
283  return false;
284  }//method
285 
286 
287  //Extract the string content from lexeme, adjusts the escaped and double-end symbols
288  protected override bool ConvertValue(CompoundTokenDetails details) {
289  string value = details.Body;
290  bool escapeEnabled = !details.IsSet((short)StringOptions.NoEscapes);
291  //Fix all escapes
292  if (escapeEnabled && value.IndexOf(EscapeChar) >= 0) {
293  details.Flags |= (int) StringFlagsInternal.HasEscapes;
294  string[] arr = value.Split(EscapeChar);
295  bool ignoreNext = false;
296  //we skip the 0 element as it is not preceeded by "\"
297  for (int i = 1; i < arr.Length; i++) {
298  if (ignoreNext) {
299  ignoreNext = false;
300  continue;
301  }
302  string s = arr[i];
303  if (string.IsNullOrEmpty(s)) {
304  //it is "\\" - escaped escape symbol.
305  arr[i] = @"\";
306  ignoreNext = true;
307  continue;
308  }
309  //The char is being escaped is the first one; replace it with char in Escapes table
310  char first = s[0];
311  string newFirst;
312  if (Escapes.TryGetValue(first, out newFirst))
313  arr[i] = newFirst + s.Substring(1);
314  else {
315  arr[i] = HandleSpecialEscape(arr[i], details);
316  }//else
317  }//for i
318  value = string.Join(string.Empty, arr);
319  }// if EscapeEnabled
320 
321  //Check for doubled end symbol
322  string endSymbol = details.EndSymbol;
323  if (details.IsSet((short)StringOptions.AllowsDoubledQuote) && value.IndexOf(endSymbol) >= 0)
324  value = value.Replace(endSymbol + endSymbol, endSymbol);
325 
326  if (details.IsSet((short)StringOptions.IsChar)) {
327  if (value.Length != 1) {
328  details.Error = Resources.ErrBadChar; //"Invalid length of char literal - should be a single character.";
329  return false;
330  }
331  details.Value = value[0];
332  } else {
333  details.TypeCodes = new TypeCode[] { TypeCode.String };
334  details.Value = value;
335  }
336  return true;
337  }
338 
339  //Should support: \Udddddddd, \udddd, \xdddd, \N{name}, \0, \ddd (octal),
340  protected virtual string HandleSpecialEscape(string segment, CompoundTokenDetails details) {
341  if (string.IsNullOrEmpty(segment)) return string.Empty;
342  int len, p; string digits; char ch; string result;
343  char first = segment[0];
344  switch (first) {
345  case 'u':
346  case 'U':
347  if (details.IsSet((short)StringOptions.AllowsUEscapes)) {
348  len = (first == 'u' ? 4 : 8);
349  if (segment.Length < len + 1) {
350  details.Error = string.Format(Resources.ErrBadUnEscape, segment.Substring(len + 1), len);// "Invalid unicode escape ({0}), expected {1} hex digits."
351  return segment;
352  }
353  digits = segment.Substring(1, len);
354  ch = (char) Convert.ToUInt32(digits, 16);
355  result = ch + segment.Substring(len + 1);
356  return result;
357  }//if
358  break;
359  case 'x':
360  if (details.IsSet((short)StringOptions.AllowsXEscapes)) {
361  //x-escape allows variable number of digits, from one to 4; let's count them
362  p = 1; //current position
363  while (p < 5 && p < segment.Length) {
364  if (Strings.HexDigits.IndexOf(segment[p]) < 0) break;
365  p++;
366  }
367  //p now point to char right after the last digit
368  if (p <= 1) {
369  details.Error = Resources.ErrBadXEscape; // @"Invalid \x escape, at least one digit expected.";
370  return segment;
371  }
372  digits = segment.Substring(1, p - 1);
373  ch = (char) Convert.ToUInt32(digits, 16);
374  result = ch + segment.Substring(p);
375  return result;
376  }//if
377  break;
378  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
379  if (details.IsSet((short)StringOptions.AllowsOctalEscapes)) {
380  //octal escape allows variable number of digits, from one to 3; let's count them
381  p = 0; //current position
382  while (p < 3 && p < segment.Length) {
383  if (Strings.OctalDigits.IndexOf(segment[p]) < 0) break;
384  p++;
385  }
386  //p now point to char right after the last digit
387  digits = segment.Substring(0, p);
388  ch = (char)Convert.ToUInt32(digits, 8);
389  result = ch + segment.Substring(p);
390  return result;
391  }//if
392  break;
393  }//switch
394  details.Error = string.Format(Resources.ErrInvEscape, segment); //"Invalid escape sequence: \{0}"
395  return segment;
396  }//method
397  #endregion
398 
399  }//class
400 
401 }//namespace
override void Init(GrammarData grammarData)
static string ErrInvStrDef
Looks up a localized string similar to Error in string literal [{0}]: No start/end symbols specified...
_In_ size_t _In_ DXGI_FORMAT _In_ size_t _In_ DXGI_FORMAT _In_ DWORD flags
Definition: DirectXTexP.h:170
A strongly-typed resource class, for looking up localized strings, etc.
override void ReadSuffix(ISourceStream source, CompoundTerminalBase.CompoundTokenDetails details)
_In_ size_t _In_ DXGI_FORMAT _In_ size_t _In_ float size_t y
Definition: DirectXTexP.h:191
StringLiteral(string name, string startEndSymbol, StringOptions options)
static string ErrTemplMissingExprRoot
Looks up a localized string similar to Expression root is not specified in template settings (AstNode...
Interface for Terminals to access the source stream and produce tokens.
static string ErrBadUnEscape
Looks up a localized string similar to Invalid unicode escape ({0}), expected {1} hex digits...
void AddPrefix(string prefix, StringOptions flags)
Flags
Enumeration of the new Assimp's flags.
static string ErrInvEscape
Looks up a localized string similar to Invalid escape sequence:0}..
delegate void AstNodeCreator(ParsingContext context, ParseTreeNode parseNode)
virtual string HandleSpecialEscape(string segment, CompoundTokenDetails details)
const string OctalDigits
Definition: StringUtils.cs:22
void AddStartEnd(string startSymbol, string endSymbol, StringOptions stringOptions)
StringLiteral(string name, string startEndSymbol, StringOptions options, AstNodeCreator astNodeCreator)
static string ErrTemplExprNotRoot
Looks up a localized string similar to Expression root non-terminal in template settings (AstNodeConf...
function s(a)
static string ErrBadStrLiteral
Looks up a localized string similar to Mal-formed string literal - cannot find termination symbol...
const string HexDigits
Definition: StringUtils.cs:23
HRESULT Convert(_In_ const Image &srcImage, _In_ DXGI_FORMAT format, _In_ DWORD filter, _In_ float threshold, _Out_ ScratchImage &image)
StringLiteral(string name, string startEndSymbol)
override bool ReadBody(ISourceStream source, CompoundTokenDetails details)
static string ErrBadXEscape
Looks up a localized string similar to Invalid escape, at least one digit expected..
StringLiteral(string name, string startEndSymbol, StringOptions options, Type astNodeType)
override bool ConvertValue(CompoundTokenDetails details)
static string ErrTemplNoSettings
Looks up a localized string similar to Error in string literal [{0}]: IsTemplate flag is set...
override IList< string > GetFirsts()
static string ErrDupStartSymbolStr
Looks up a localized string similar to Duplicate start symbol {0} in string literal [{1}]...
override void InitDetails(ParsingContext context, CompoundTerminalBase.CompoundTokenDetails details)
NonTerminalSet SnippetRoots
Definition: Grammar.cs:75
void AddStartEnd(string startEndSymbol, StringOptions stringOptions)
static string ErrBadChar
Looks up a localized string similar to Invalid length of char literal - should be a single character...