#include <cmn_FastSearch.h>


Base on ASCII table. Best for searc long pattern with no repeated characters in it.
Originaly written by
http://www-igm.univ-mlv.fr/~lecroq/string/node18.html#SECTION00180
Definition at line 131 of file cmn_FastSearch.h.
Public Member Functions | |
| cmn_FastPattSearch (cmn_SkipTbl4FS *a_skipTbl_p) | |
| cmn_FastPattSearch (const char *const a_buffer_p, const int a_bufLen, cmn_SkipTbl4FS *a_skipTbl_p) | |
| virtual | ~cmn_FastPattSearch () |
| void | SetPattern (const string &a_pattern) |
| set new search pattern | |
| Int32_t | Find () |
| NOTE! if return value is grater than buffer then string was not found the return value is willing handled in this way, because of using same algorithem with merging buffers. | |
Public Attributes | |
| log_CLASSID_m | |
| Macro to add class name member s_className. | |
Private Attributes | |
| cmn_SkipTbl4FS * | m_skipTbl_p |
| bad-character skip table instance, it is possible that is preconfigured in case of searching already known string | |
| cmn_FastPattSearch::cmn_FastPattSearch | ( | cmn_SkipTbl4FS * | a_skipTbl_p | ) |
Definition at line 61 of file cmn_FastSearch.cpp.
References log_FUNC_m.
00063 : 00064 cmn_FastSearch(NULL, 0), 00065 m_skipTbl_p(a_skipTbl_p) 00066 { 00067 log_FUNC_m(cmn_FastPattSearch); 00068 // Empty 00069 } //============================================================================//
| cmn_FastPattSearch::cmn_FastPattSearch | ( | const char *const | a_buffer_p, | |
| const int | a_bufLen, | |||
| cmn_SkipTbl4FS * | a_skipTbl_p | |||
| ) |
Definition at line 73 of file cmn_FastSearch.cpp.
References log_FUNC_m.
00077 : 00078 cmn_FastSearch(a_buffer_p, a_bufLen), 00079 m_skipTbl_p(a_skipTbl_p) 00080 { 00081 log_FUNC_m(cmn_FastPattSearch); 00082 //Empty 00083 } //============================================================================//
| cmn_FastPattSearch::~cmn_FastPattSearch | ( | ) | [virtual] |
Definition at line 87 of file cmn_FastSearch.cpp.
References log_FUNC_m.
00087 { 00088 log_FUNC_m(~cmn_FastPattSearch); 00089 00090 /*DEL if (m_skipTblConstructedInternaly) { 00091 //DEL delete [] m_bmBc_p; 00092 delete m_skipTbl_p; 00093 } 00094 */ 00095 }
| void cmn_FastPattSearch::SetPattern | ( | const string & | a_pattern | ) | [inline, virtual] |
set new search pattern
Implements cmn_FastSearch.
Definition at line 157 of file cmn_FastSearch.h.
00157 {m_skipTbl_p->SetPattern(a_pattern);};
| Int32_t cmn_FastPattSearch::Find | ( | ) | [virtual] |
NOTE! if return value is grater than buffer then string was not found the return value is willing handled in this way, because of using same algorithem with merging buffers.
It's known how much buffer is need to be merged at front of next buffer.
Implements cmn_FastSearch.
Definition at line 123 of file cmn_FastSearch.cpp.
References cmn_SkipTbl4FS::GetPattern(), cmn_SkipTbl4FS::GetPatternLen(), cmn_SkipTbl4FS::GettSkipTbl(), ie_INVALID_ARG, log_FUNC_m, cmn_FastSearch::m_buffer_p, cmn_FastSearch::m_bufLen, m_skipTbl_p, and NULL.
00123 { 00124 log_FUNC_m(Find); 00125 00126 if (m_buffer_p == NULL) { 00127 throw ivd_InternalError(ie_INVALID_ARG, "m_buffer_p == NULL. Call SetBuffer() first."); 00128 } 00129 00130 UInt32_t patLen = m_skipTbl_p->GetPatternLen(); 00131 UInt32_t patLen_1 = patLen - 1; 00132 00133 register const char *p = m_buffer_p + patLen_1; 00134 //DEL register int skip = 0; 00135 00136 register const UInt16_t *b = m_skipTbl_p->GettSkipTbl(); 00137 const char *patt = m_skipTbl_p->GetPattern(); 00138 00139 register char lastChar = patt[patLen_1]; 00140 00141 const char *buff = m_buffer_p; 00142 00143 /* ut_cmn test "102", "Performance test, predefined skip table" on NT 00144 1.372 sec 0.730 on Linux Unit test code is cmn_FastPattSrch_102 00145 */ 00146 const char *limit = buff + m_bufLen; 00147 while (p < limit) { // this is optimized not readable code it si 14% faster 00148 // see below in comment for original code 00149 if ( (*p == lastChar) 00150 && (memcmp(patt, p - patLen_1, patLen_1) == 0)) { 00151 return static_cast<Int32_t>(p - buff - patLen_1); 00152 } 00153 p += *(b + (UInt8_t)(*p)); 00154 } 00155 return static_cast<Int32_t>( (p - buff - m_bufLen) - patLen ); // NOTE! if return value is negative then string was not found 00156 00157 /* DO NOT USE ut_cmn test "102", "Performance test, predefined skip table" on NT 00158 1.502 sec 1.312 with general optimizaton 00159 const char *limit = buff + m_bufLen; 00160 while (p < limit) { // this is optimized not readable code it si 14% faster 00161 // see below in comment for original code 00162 //>>> CORE DUMP 00163 skip = *(b + (UInt8_t)(*(p += skip))); 00164 // access to char out of buffer 00165 if ( (*p == lastChar) 00166 && (memcmp(patt, p - patLen_1, patLen_1) == 0)) { 00167 return p - buff - patLen_1; 00168 } 00169 } 00170 return (p - buff - m_bufLen) - patLen; // NOTE! if return value is negative then string was not found 00171 */ 00172 00173 /* 2.082 sec 00174 char c = *p; 00175 const char *limit = buff + m_bufLen; 00176 while (p < limit) { // this is optimized not readable code it si 14% faster 00177 // see below in comment for original code 00178 skip = *(b + (UInt8_t)(c = *(p += skip))); 00179 if ( (c == lastChar) 00180 && (memcmp(patt, p - patLen_1, patLen_1) == 0)) { 00181 return p - buff - patLen_1; 00182 } 00183 } 00184 return (p - buff - m_bufLen) - patLen; // NOTE! if return value is negative then string was not found 00185 */ 00186 /* 2.173 sec 00187 char c = *p; 00188 const char *limit = buff + m_bufLen; 00189 while (p < limit) { // this is optimized not readable code it si 14% faster 00190 // see below in comment for original code 00191 if ( (lastChar == (c = *(p += skip) ) ) 00192 && (memcmp(patt, p - patLen_1, patLen_1) == 0)) { 00193 return p - buff - patLen_1; 00194 } 00195 skip = *(b + (UInt8_t)c ); 00196 } 00197 return (p - buff - m_bufLen) - patLen; // NOTE! if return value is negative then string was not found 00198 */ 00199 00200 /* 2.234 sec 00201 char c = *p; 00202 unsigned int j = 0; // from begining of buffer 00203 unsigned int limit = m_bufLen - m_patLen; 00204 while (j <= limit) { // this is optimized not readable code it si 14% faster 00205 // see below in comment for original code 00206 if ( (lastChar == (c = *(p += skip) ) ) 00207 && (memcmp(patt, buff + j, patLen_1) == 0)) { 00208 return j; 00209 } 00210 j += (skip = *(b + (UInt8_t)c ) ); 00211 } 00212 return j - m_bufLen; // NOTE! if return value is negative then string was not found 00213 */ 00214 /* 2.213 ORIGINAL code 00215 unsigned int j = 0; // from begining of buffer 00216 unsigned int limit = m_bufLen - m_patLen; 00217 while (j <= limit) { 00218 char c = *p; 00219 if ( (lastChar == c) 00220 && (memcmp(patt, buff + j, patLen_1) == 0)) { 00221 return j; 00222 } 00223 int skip = *(b + (UInt8_t)c ); 00224 j += skip; 00225 p += skip; 00226 } 00227 return j - m_bufLen; // NOTE! if return value is negative then string was not found 00228 */ 00229 }

cmn_SkipTbl4FS* cmn_FastPattSearch::m_skipTbl_p [private] |
bad-character skip table instance, it is possible that is preconfigured in case of searching already known string
Definition at line 149 of file cmn_FastSearch.h.
Referenced by Find().
Macro to add class name member s_className.
Reimplemented from cmn_FastSearch.
Definition at line 152 of file cmn_FastSearch.h.
1.5.6