Paradox Game Engine  v1.0.0 beta06
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Events Macros Pages
RegExLiteral.cs
Go to the documentation of this file.
1 #region License
2 /* **********************************************************************************
3  * Copyright (c) Roman Ivantsov
4  * This source code is subject to terms and conditions of the MIT License
5  * for Irony. A copy of the license can be found in the License.txt file
6  * at the root of this distribution.
7  * By using this source code in any fashion, you are agreeing to be bound by the terms of the
8  * MIT License.
9  * You must not remove this notice from this software.
10  * **********************************************************************************/
11 #endregion
12 
13 using System;
14 using System.Collections.Generic;
15 using System.Linq;
16 using System.Text;
17 using System.Text.RegularExpressions;
18 
19 namespace Irony.Parsing {
20  // Regular expression literal, like javascript literal: /abc?/i
21  // Allows optional switches
22  // example:
23  // regex = /abc\\\/de/
24  // matches fragments like "abc\/de"
25  // Note: switches are returned in token.Details field. Unlike in StringLiteral, we don't need to unescape the escaped chars,
26  // (this is the job of regex engine), we only need to correctly recognize the end of expression
27 
28  [Flags]
29  public enum RegexTermOptions {
30  None = 0,
31  AllowLetterAfter = 0x01, //if not set (default) then any following letter (after legal switches) is reported as invalid switch
32  CreateRegExObject = 0x02, //if set, token.Value contains Regex object; otherwise, it contains a pattern (string)
33  UniqueSwitches = 0x04, //require unique switches
34 
36  }
37 
38  public class RegExLiteral : Terminal {
39  public class RegexSwitchTable : Dictionary<char, RegexOptions> { }
40 
41  public Char StartSymbol = '/';
42  public Char EndSymbol='/';
43  public Char EscapeSymbol='\\';
44  public RegexSwitchTable Switches = new RegexSwitchTable();
45  public RegexOptions DefaultOptions = RegexOptions.None;
46  public RegexTermOptions Options = RegexTermOptions.Default;
47 
48  private char[] _stopChars;
49 
50  public RegExLiteral(string name) : base(name) {
51  Switches.Add('i', RegexOptions.IgnoreCase);
52  Switches.Add('g', RegexOptions.None); //not sure what to do with this flag? anybody, any advice?
53  Switches.Add('m', RegexOptions.Multiline);
54  base.SetFlag(TermFlags.IsLiteral);
55  }
56 
57  public RegExLiteral(string name, char startEndSymbol, char escapeSymbol) : base(name) {
58  StartSymbol = startEndSymbol;
59  EndSymbol = startEndSymbol;
60  EscapeSymbol = escapeSymbol;
61  }//constructor
62 
63  public override void Init(GrammarData grammarData) {
64  base.Init(grammarData);
65  _stopChars = new char[] { EndSymbol, '\r', '\n' };
66  }
67  public override IList<string> GetFirsts() {
68  var result = new StringList();
69  result.Add(StartSymbol.ToString());
70  return result;
71  }
72 
73  public override Token TryMatch(ParsingContext context, ISourceStream source) {
74  while (true) {
75  //Find next position
76  var newPos = source.Text.IndexOfAny(_stopChars, source.PreviewPosition + 1);
77  //we either didn't find it
78  if (newPos == -1)
79  return source.CreateErrorToken(Resources.ErrNoEndForRegex);// "No end symbol for regex literal."
80  source.PreviewPosition = newPos;
81  if (source.PreviewChar != EndSymbol)
82  //we hit CR or LF, this is an error
83  return source.CreateErrorToken(Resources.ErrNoEndForRegex);
84  if (!CheckEscaped(source))
85  break;
86  }
87  source.PreviewPosition++; //move after end symbol
88  //save pattern length, we will need it
89  var patternLen = source.PreviewPosition - source.Location.Position - 2; //exclude start and end symbol
90  //read switches and turn them into options
91  RegexOptions options = RegexOptions.None;
92  var switches = string.Empty;
93  while(ReadSwitch(source, ref options)) {
94  if (IsSet(RegexTermOptions.UniqueSwitches) && switches.Contains(source.PreviewChar))
95  return source.CreateErrorToken(Resources.ErrDupRegexSwitch, source.PreviewChar); // "Duplicate switch '{0}' for regular expression"
96  switches += source.PreviewChar.ToString();
97  source.PreviewPosition++;
98  }
99  //check following symbol
100  if (!IsSet(RegexTermOptions.AllowLetterAfter)) {
101  var currChar = source.PreviewChar;
102  if (char.IsLetter(currChar) || currChar == '_')
103  return source.CreateErrorToken(Resources.ErrInvRegexSwitch, currChar); // "Invalid switch '{0}' for regular expression"
104  }
105  var token = source.CreateToken(this.OutputTerminal);
106  //we have token, now what's left is to set its Value field. It is either pattern itself, or Regex instance
107  string pattern = token.Text.Substring(1, patternLen); //exclude start and end symbol
108  object value = pattern;
109  if (IsSet(RegexTermOptions.CreateRegExObject)) {
110  value = new Regex(pattern, options);
111  }
112  token.Value = value;
113  token.Details = switches; //save switches in token.Details
114  return token;
115  }
116 
117  private bool CheckEscaped(ISourceStream source) {
118  var savePos = source.PreviewPosition;
119  bool escaped = false;
120  source.PreviewPosition--;
121  while (source.PreviewChar == EscapeSymbol){
122  escaped = !escaped;
123  source.PreviewPosition--;
124  }
125  source.PreviewPosition = savePos;
126  return escaped;
127  }
128  private bool ReadSwitch(ISourceStream source, ref RegexOptions options) {
129  RegexOptions option;
130  var result = Switches.TryGetValue(source.PreviewChar, out option);
131  if (result)
132  options |= option;
133  return result;
134  }
135 
136  public bool IsSet(RegexTermOptions option) {
137  return (Options & option) != 0;
138  }
139 
140  }//class
141 
142 }//namespace
bool IsSet(RegexTermOptions option)
override void Init(GrammarData grammarData)
Definition: RegExLiteral.cs:63
char PreviewChar
Gets a char at preview position
A strongly-typed resource class, for looking up localized strings, etc.
static string ErrNoEndForRegex
Looks up a localized string similar to No end symbol for regex literal..
Interface for Terminals to access the source stream and produce tokens.
Flags
Enumeration of the new Assimp's flags.
static string ErrInvRegexSwitch
Looks up a localized string similar to Invalid switch '{0}' for regular expression.
override IList< string > GetFirsts()
Definition: RegExLiteral.cs:67
static string ErrDupRegexSwitch
Looks up a localized string similar to Duplicate switch '{0}' for regular expression..
Tokens are produced by scanner and fed to parser, optionally passing through Token filters in between...
Definition: Token.cs:74
override Token TryMatch(ParsingContext context, ISourceStream source)
Definition: RegExLiteral.cs:73
RegExLiteral(string name, char startEndSymbol, char escapeSymbol)
Definition: RegExLiteral.cs:57
Token CreateErrorToken(string message, params object[] args)
Creates error token with custom error message as its Value.