/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 164 by ph10, Fri May 4 15:11:14 2007 UTC revision 168 by ph10, Tue May 29 15:18:18 2007 UTC
# Line 189  obtained from malloc() instead instead o Line 189  obtained from malloc() instead instead o
189  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
190  always used to.  always used to.
191    
192  The original heap-recursive code used longjmp(). However, it seems that this  The original heap-recursive code used longjmp(). However, it seems that this
193  can be very slow on some operating systems. Following a suggestion from Stan  can be very slow on some operating systems. Following a suggestion from Stan
194  Switzer, the use of longjmp() has been abolished, at the cost of having to  Switzer, the use of longjmp() has been abolished, at the cost of having to
195  provide a unique number for each call to RMATCH. There is no way of generating  provide a unique number for each call to RMATCH. There is no way of generating
# Line 198  them stand out more clearly. Line 198  them stand out more clearly.
198    
199  Crude tests on x86 Linux show a small speedup of around 5-8%. However, on  Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
200  FreeBSD, avoiding longjmp() more than halves the time taken to run the standard  FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
201  tests. Furthermore, not using longjmp() means that local dynamic variables  tests. Furthermore, not using longjmp() means that local dynamic variables
202  don't have indeterminate values; this has meant that the frame size can be  don't have indeterminate values; this has meant that the frame size can be
203  reduced because the result can be "passed back" by straight setting of the  reduced because the result can be "passed back" by straight setting of the
204  variable instead of being passed in the frame.  variable instead of being passed in the frame.
205  ****************************************************************************  ****************************************************************************
# Line 213  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 213  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
213         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
214         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
215         RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };         RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };
216    
217    
218  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
219  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
220  actuall used in this definition. */  actuall used in this definition. */
221    
222  #ifndef NO_RECURSE  #ifndef NO_RECURSE
# Line 226  actuall used in this definition. */ Line 226  actuall used in this definition. */
226  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
227    { \    { \
228    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
229    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
230    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
231    }    }
232  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 236  actuall used in this definition. */ Line 236  actuall used in this definition. */
236    }    }
237  #else  #else
238  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
239    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
240  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
241  #endif  #endif
242    
# Line 255  argument of match(), which never changes Line 255  argument of match(), which never changes
255    frame->Xwhere = rw; \    frame->Xwhere = rw; \
256    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
257    newframe->Xecode = rb;\    newframe->Xecode = rb;\
258      newframe->Xmstart = mstart;\
259    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
260    newframe->Xims = re;\    newframe->Xims = re;\
261    newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
# Line 291  typedef struct heapframe { Line 292  typedef struct heapframe {
292    
293    const uschar *Xeptr;    const uschar *Xeptr;
294    const uschar *Xecode;    const uschar *Xecode;
295      const uschar *Xmstart;
296    int Xoffset_top;    int Xoffset_top;
297    long int Xims;    long int Xims;
298    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 344  typedef struct heapframe { Line 346  typedef struct heapframe {
346    /* Where to jump back to */    /* Where to jump back to */
347    
348    int Xwhere;    int Xwhere;
349    
350  } heapframe;  } heapframe;
351    
352  #endif  #endif
# Line 371  made performance worse. Line 373  made performance worse.
373  Arguments:  Arguments:
374     eptr        pointer to current character in subject     eptr        pointer to current character in subject
375     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
376       mstart      pointer to the current match start position (can be modified
377                     by encountering \K)
378     offset_top  current top pointer     offset_top  current top pointer
379     md          pointer to "static" info for the match     md          pointer to "static" info for the match
380     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 390  Returns:       MATCH_MATCH if matched Line 394  Returns:       MATCH_MATCH if matched
394  */  */
395    
396  static int  static int
397  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
398    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
399    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
400  {  {
# Line 418  frame->Xprevframe = NULL;            /* Line 422  frame->Xprevframe = NULL;            /*
422    
423  frame->Xeptr = eptr;  frame->Xeptr = eptr;
424  frame->Xecode = ecode;  frame->Xecode = ecode;
425    frame->Xmstart = mstart;
426  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
427  frame->Xims = ims;  frame->Xims = ims;
428  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 432  HEAP_RECURSE: Line 437  HEAP_RECURSE:
437    
438  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
439  #define ecode              frame->Xecode  #define ecode              frame->Xecode
440    #define mstart             frame->Xmstart
441  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
442  #define ims                frame->Xims  #define ims                frame->Xims
443  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 610  for (;;) Line 616  for (;;)
616    
617    if (md->partial &&    if (md->partial &&
618        eptr >= md->end_subject &&        eptr >= md->end_subject &&
619        eptr > md->start_match)        eptr > mstart)
620      md->hitend = TRUE;      md->hitend = TRUE;
621    
622    switch(op)    switch(op)
# Line 787  for (;;) Line 793  for (;;)
793        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
794        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
795          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
796        md->start_match = rec->save_start;        mstart = rec->save_start;
797        ims = original_ims;        ims = original_ims;
798        ecode = rec->after_call;        ecode = rec->after_call;
799        break;        break;
# Line 796  for (;;) Line 802  for (;;)
802      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
803      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
804    
805      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
806      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
807      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
808        md->start_match_ptr = mstart;  /* and the start (\K can modify) */
809      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
810    
811      /* Change option settings */      /* Change option settings */
# Line 904  for (;;) Line 911  for (;;)
911        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
912        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
913        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
914        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
915        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
916        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
917        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 966  for (;;) Line 973  for (;;)
973    
974        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
975              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
976        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
977        md->start_match = eptr;        mstart = eptr;
978    
979        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
980        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 1180  for (;;) Line 1187  for (;;)
1187          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1188          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1189          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
1190          md->start_match = rec->save_start;          mstart = rec->save_start;
1191          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1192            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1193          ecode = rec->after_call;          ecode = rec->after_call;
# Line 1259  for (;;) Line 1266  for (;;)
1266      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1267      ecode++;      ecode++;
1268      break;      break;
1269    
1270        /* Reset the start of match point */
1271    
1272        case OP_SET_SOM:
1273        mstart = eptr;
1274        ecode++;
1275        break;
1276    
1277      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1278      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
# Line 3651  for (;;) Line 3665  for (;;)
3665  /* Control never reaches here */  /* Control never reaches here */
3666    
3667    
3668  /* When compiling to use the heap rather than the stack for recursive calls to  /* When compiling to use the heap rather than the stack for recursive calls to
3669  match(), the RRETURN() macro jumps here. The number that is saved in  match(), the RRETURN() macro jumps here. The number that is saved in
3670  frame->Xwhere indicates which label we actually want to return to. */  frame->Xwhere indicates which label we actually want to return to. */
3671    
3672  #ifdef NO_RECURSE  #ifdef NO_RECURSE
# Line 3670  switch (frame->Xwhere) Line 3684  switch (frame->Xwhere)
3684    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
3685    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
3686    }    }
3687  #undef LBL  #undef LBL
3688  #endif  /* NO_RECURSE */  #endif  /* NO_RECURSE */
3689  }  }
3690    
# Line 3684  Undefine all the macros that were define Line 3698  Undefine all the macros that were define
3698  #ifdef NO_RECURSE  #ifdef NO_RECURSE
3699  #undef eptr  #undef eptr
3700  #undef ecode  #undef ecode
3701    #undef mstart
3702  #undef offset_top  #undef offset_top
3703  #undef ims  #undef ims
3704  #undef eptrb  #undef eptrb
# Line 4163  for(;;) Line 4178  for(;;)
4178    
4179    /* OK, we can now run the match. */    /* OK, we can now run the match. */
4180    
4181    md->start_match = start_match;    md->start_match_ptr = start_match;      /* Insurance */
4182    md->match_call_count = 0;    md->match_call_count = 0;
4183    md->eptrn = 0;                          /* Next free eptrchain slot */    md->eptrn = 0;                          /* Next free eptrchain slot */
4184    rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);    rc = match(start_match, md->start_code, start_match, 2, md,
4185        ims, NULL, 0, 0);
4186    
4187    /* Any return other than MATCH_NOMATCH breaks the loop. */    /* Any return other than MATCH_NOMATCH breaks the loop. */
4188    
# Line 4246  if (rc == MATCH_MATCH) Line 4262  if (rc == MATCH_MATCH)
4262    
4263    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
4264    
4265    /* If there is space, set up the whole thing as substring 0. */    /* If there is space, set up the whole thing as substring 0. The value of
4266      md->start_match_ptr might be modified if \K was encountered on the success
4267      matching path. */
4268    
4269    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4270      {      {
4271      offsets[0] = start_match - md->start_subject;      offsets[0] = md->start_match_ptr - md->start_subject;
4272      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4273      }      }
4274    

Legend:
Removed from v.164  
changed lines
  Added in v.168

  ViewVC Help
Powered by ViewVC 1.1.5