cmn_FastPattSearch Class Reference
[G_new_group]

#include <cmn_FastSearch.h>

Inheritance diagram for cmn_FastPattSearch:

Inheritance graph
[legend]
Collaboration diagram for cmn_FastPattSearch:

Collaboration graph
[legend]

List of all members.


Detailed Description

This is a class for fast pattern search through buffer.

Base on ASCII table. Best for searc long pattern with no repeated characters in it.

Originaly written by

Author:
Boyer-Moore and optimized as Horspool algorithm
See also:
http://www-igm.univ-mlv.fr/~lecroq/string/node14.html#SECTION00140

http://www-igm.univ-mlv.fr/~lecroq/string/node18.html#SECTION00180

Definition at line 131 of file cmn_FastSearch.h.


Public Member Functions

 cmn_FastPattSearch (cmn_SkipTbl4FS *a_skipTbl_p)
 cmn_FastPattSearch (const char *const a_buffer_p, const int a_bufLen, cmn_SkipTbl4FS *a_skipTbl_p)
virtual ~cmn_FastPattSearch ()
void SetPattern (const string &a_pattern)
 set new search pattern
Int32_t Find ()
 NOTE! if return value is grater than buffer then string was not found the return value is willing handled in this way, because of using same algorithem with merging buffers.

Public Attributes

 log_CLASSID_m
 Macro to add class name member s_className.

Private Attributes

cmn_SkipTbl4FSm_skipTbl_p
 bad-character skip table instance, it is possible that is preconfigured in case of searching already known string

Constructor & Destructor Documentation

cmn_FastPattSearch::cmn_FastPattSearch ( cmn_SkipTbl4FS a_skipTbl_p  ) 

Definition at line 61 of file cmn_FastSearch.cpp.

References log_FUNC_m.

00063             :
00064             cmn_FastSearch(NULL, 0),
00065             m_skipTbl_p(a_skipTbl_p)
00066 {
00067     log_FUNC_m(cmn_FastPattSearch);
00068     // Empty
00069 } 
//============================================================================//

cmn_FastPattSearch::cmn_FastPattSearch ( const char *const   a_buffer_p,
const int  a_bufLen,
cmn_SkipTbl4FS a_skipTbl_p 
)

Definition at line 73 of file cmn_FastSearch.cpp.

References log_FUNC_m.

00077             :
00078             cmn_FastSearch(a_buffer_p, a_bufLen),
00079             m_skipTbl_p(a_skipTbl_p)
00080 {
00081     log_FUNC_m(cmn_FastPattSearch);
00082     //Empty
00083 }
//============================================================================//

cmn_FastPattSearch::~cmn_FastPattSearch (  )  [virtual]

Definition at line 87 of file cmn_FastSearch.cpp.

References log_FUNC_m.

00087                                         {
00088     log_FUNC_m(~cmn_FastPattSearch);
00089 
00090 /*DEL    if (m_skipTblConstructedInternaly) {
00091 //DEL        delete [] m_bmBc_p;
00092         delete  m_skipTbl_p;
00093     }
00094 */
00095 }


Member Function Documentation

void cmn_FastPattSearch::SetPattern ( const string &  a_pattern  )  [inline, virtual]

set new search pattern

Implements cmn_FastSearch.

Definition at line 157 of file cmn_FastSearch.h.

00157 {m_skipTbl_p->SetPattern(a_pattern);};

Int32_t cmn_FastPattSearch::Find (  )  [virtual]

NOTE! if return value is grater than buffer then string was not found the return value is willing handled in this way, because of using same algorithem with merging buffers.

It's known how much buffer is need to be merged at front of next buffer.

Implements cmn_FastSearch.

Definition at line 123 of file cmn_FastSearch.cpp.

References cmn_SkipTbl4FS::GetPattern(), cmn_SkipTbl4FS::GetPatternLen(), cmn_SkipTbl4FS::GettSkipTbl(), ie_INVALID_ARG, log_FUNC_m, cmn_FastSearch::m_buffer_p, cmn_FastSearch::m_bufLen, m_skipTbl_p, and NULL.

00123                                  {
00124     log_FUNC_m(Find);
00125 
00126     if (m_buffer_p == NULL) {
00127         throw ivd_InternalError(ie_INVALID_ARG, "m_buffer_p == NULL. Call SetBuffer() first.");
00128     }
00129 
00130     UInt32_t    patLen   = m_skipTbl_p->GetPatternLen();
00131     UInt32_t    patLen_1 = patLen - 1;
00132 
00133     register const char *p    = m_buffer_p + patLen_1;
00134 //DEL    register int         skip = 0;
00135 
00136     register const UInt16_t *b    = m_skipTbl_p->GettSkipTbl();
00137     const    char           *patt = m_skipTbl_p->GetPattern();
00138 
00139     register char    lastChar = patt[patLen_1];
00140 
00141     const char          *buff = m_buffer_p;
00142 
00143 /* ut_cmn test "102", "Performance test, predefined skip table" on NT 
00144    1.372 sec    0.730 on Linux       Unit test code is  cmn_FastPattSrch_102
00145 */
00146     const char *limit = buff + m_bufLen;
00147     while (p < limit) {   // this is optimized not readable code  it si 14% faster
00148                            // see below in comment for original code 
00149        if (  (*p == lastChar) 
00150           && (memcmp(patt, p - patLen_1, patLen_1) == 0)) {
00151           return static_cast<Int32_t>(p - buff - patLen_1);
00152        }
00153        p += *(b + (UInt8_t)(*p));
00154     }
00155     return static_cast<Int32_t>( (p - buff - m_bufLen) - patLen );  // NOTE! if return value is negative then string was not found
00156 
00157 /* DO NOT USE ut_cmn test "102", "Performance test, predefined skip table" on NT
00158    1.502 sec  1.312 with general optimizaton 
00159     const char *limit = buff + m_bufLen;
00160     while (p < limit) {   // this is optimized not readable code  it si 14% faster
00161                            // see below in comment for original code 
00162 //>>> CORE DUMP
00163         skip = *(b + (UInt8_t)(*(p += skip)));
00164 //    access to char out of buffer
00165        if (  (*p == lastChar) 
00166           && (memcmp(patt, p - patLen_1, patLen_1) == 0)) {
00167           return p - buff - patLen_1;
00168        }
00169     }
00170     return (p - buff - m_bufLen) - patLen;  // NOTE! if return value is negative then string was not found
00171 */
00172 
00173 /* 2.082 sec
00174     char c   = *p;
00175     const char *limit = buff + m_bufLen;
00176     while (p < limit) {   // this is optimized not readable code  it si 14% faster
00177                            // see below in comment for original code 
00178        skip = *(b + (UInt8_t)(c = *(p += skip)));
00179        if (  (c == lastChar) 
00180           && (memcmp(patt, p - patLen_1, patLen_1) == 0)) {
00181           return p - buff - patLen_1;
00182        }
00183     }
00184     return (p - buff - m_bufLen) - patLen;  // NOTE! if return value is negative then string was not found
00185 */
00186 /* 2.173 sec 
00187     char c   = *p;
00188     const char *limit = buff + m_bufLen;
00189     while (p < limit) {   // this is optimized not readable code  it si 14% faster
00190                            // see below in comment for original code 
00191        if (  (lastChar == (c = *(p += skip) ) ) 
00192           && (memcmp(patt, p - patLen_1, patLen_1) == 0)) {
00193           return p - buff - patLen_1;
00194        }
00195        skip = *(b + (UInt8_t)c );
00196     }
00197     return (p - buff - m_bufLen) - patLen;  // NOTE! if return value is negative then string was not found
00198 */
00199 
00200 /* 2.234 sec 
00201     char c   = *p;
00202     unsigned int j = 0; // from begining of buffer
00203     unsigned int limit    = m_bufLen - m_patLen;
00204     while (j <= limit) {   // this is optimized not readable code  it si 14% faster
00205                            // see below in comment for original code 
00206        if (  (lastChar == (c = *(p += skip) ) ) 
00207           && (memcmp(patt, buff + j, patLen_1) == 0)) {
00208           return j;
00209        }
00210        j += (skip = *(b + (UInt8_t)c ) );
00211     }
00212     return j - m_bufLen;  // NOTE! if return value is negative then string was not found
00213 */
00214 /* 2.213    ORIGINAL code
00215     unsigned int j = 0; // from begining of buffer
00216     unsigned int limit = m_bufLen - m_patLen;
00217     while (j <= limit) {
00218        char c = *p;       
00219        if (  (lastChar == c) 
00220           && (memcmp(patt, buff + j, patLen_1) == 0)) {
00221           return j;
00222        }
00223        int skip = *(b + (UInt8_t)c );
00224        j += skip;
00225        p += skip; 
00226     }
00227     return j - m_bufLen;  // NOTE! if return value is negative then string was not found
00228 */
00229 }

Here is the call graph for this function:


Member Data Documentation

bad-character skip table instance, it is possible that is preconfigured in case of searching already known string

Definition at line 149 of file cmn_FastSearch.h.

Referenced by Find().

Macro to add class name member s_className.

Reimplemented from cmn_FastSearch.

Definition at line 152 of file cmn_FastSearch.h.


The documentation for this class was generated from the following files:

Generated on Mon Feb 27 19:04:27 2012 for OPENARCHIVE by  doxygen 1.5.6