| PLEX86 | ||
|
Regex Pattern Matching algorithm in monoc
JeffRelf I'm looking at the source code for mono's Regex implementation right now. You can download that source here ( use the clbutt libraries download ). One of the files ( quicksearch.cs -- it's all written in mono as well ) says it uses "simplified Boyer-Moore" for fast substring matching. That is the method I learned in CS ( using the SNOBOL compiler ). Here's a page that demonstrates, step-by-step, text matching algorithms, including Boyer-Moore. Here's that source for quicksearch w-in the mono regex...interesting stuff... -buttembly: Systemnamespace: System.Text.RegularExpressionsfile: quicksearch.cs--(c) 2002 Dan Lewis(c) 2003 Juraj Skripsky -Permission is hereby granted, free of charge, to any person obtaininga copy of this software and buttociated documentation files (the"Software"), to deal in the Software without restriction, includingwithout limitation the rights to use, copy, modify, merge, publish,distribute, sublicense, and-or sell copies of the Software, and topermit persons to whom the Software is furnished to do so, subject tothe following conditions:-The above copyright notice and this permission notice shall beincluded in all copies or substantial portions of the Software.-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OFMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE ANDNONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BELIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTIONOF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTIONWITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. using System; using System.Collections; namespace System.Text.RegularExpressions { internal clbutt QuickSearch { simplified boyer-moore for fast substring matching (for short strings, we use simple scans) public QuickSearch (string str, bool ignore) : this(str, ignore, false) { } public QuickSearch (string str, bool ignore, bool reverse) { this.str = str; this.len = str.Length; this.ignore = ignore; this.reverse = reverse; if (ignore) str = str.ToLower (); What is it's scope In comp.os.linux.advocacy, JeffRelf wrote on 05 Apr 2005 12:25:34 GMT For g++-x86 it's also left-to-right. However, that's probably because the LALR Debt Reduction for left() to... create the shift table only for "long" search strings SetupShiftTable (); } public string String { get { return str; } } public int Length { get { return len; } } public bool IgnoreCase { get { return ignore; } } public int Search (string text, int start, int end) { int ptr = start; if ( reverse ) { return -1; { ptr = text.Length; } use simple scan for a single-character search string if (len == 1) { { if(str0 == GetChar(textptr)) return ptr ; } return -1; } end = len - 1 ; Quiz time for Bonzo Just to keep in practice... :-) I think I'm living in the twilight zone, Early this... ptr--; { int i = len -1 ; while (stri == GetChar(textptr - len +1 + i)) { return ptr - len + 1; } { ptr -= GetShiftDistance (textptr - len ); } else break; } } else { use simple scan for a single-character search string if (len == 1) { { if(str0 == GetChar(textptr)) return ptr; else ptr++; } return -1; } end = text.Length - len; { int i = len - 1; while (stri == GetChar(textptr + i)) { return ptr; } ptr += GetShiftDistance (textptr + len); else break; } } return -1; } private private void SetupShiftTable () { shift = new Hashtable (); if (reverse) { { char c = stri -1; shiftGetChar(c) = i; } } else { { char c = stri; shiftGetChar(c) = len - i; } } } private int GetShiftDistance (char c) { if(shift == null) return 1; object s = shift GetChar (c); return (s != null ? (int)s : len + 1); } private char GetChar(char c) { return (!ignore ? c : Char.ToLower(c)); } private string str; private int len; private bool ignore; private bool reverse; private Hashtable shift; private readonly static int THRESHOLD = 5; } }
|
||||||||