/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 463 by ph10, Sun Oct 18 10:02:46 2009 UTC revision 501 by ph10, Sun Mar 7 11:49:54 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 89  static const char rep_max[] = { 0, 0, 0, Line 89  static const char rep_max[] = { 0, 0, 0,
89    
90    
91    
92  #ifdef DEBUG  #ifdef PCRE_DEBUG
93  /*************************************************  /*************************************************
94  *        Debugging function to print chars       *  *        Debugging function to print chars       *
95  *************************************************/  *************************************************/
# Line 141  match_ref(int offset, register USPTR ept Line 141  match_ref(int offset, register USPTR ept
141  {  {
142  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
143    
144  #ifdef DEBUG  #ifdef PCRE_DEBUG
145  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
146    printf("matching subject <null>");    printf("matching subject <null>");
147  else  else
# Line 249  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 249  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
249    
250  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
251  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
252  actuall used in this definition. */  actually used in this definition. */
253    
254  #ifndef NO_RECURSE  #ifndef NO_RECURSE
255  #define REGISTER register  #define REGISTER register
256    
257  #ifdef DEBUG  #ifdef PCRE_DEBUG
258  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259    { \    { \
260    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
261    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
262    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
263    }    }
264  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 268  actuall used in this definition. */ Line 268  actuall used in this definition. */
268    }    }
269  #else  #else
270  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
272  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
273  #endif  #endif
274    
# Line 288  argument of match(), which never changes Line 288  argument of match(), which never changes
288    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
289    newframe->Xecode = rb;\    newframe->Xecode = rb;\
290    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
291      newframe->Xmarkptr = markptr;\
292    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
293    newframe->Xims = re;\    newframe->Xims = re;\
294    newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
# Line 325  typedef struct heapframe { Line 326  typedef struct heapframe {
326    USPTR Xeptr;    USPTR Xeptr;
327    const uschar *Xecode;    const uschar *Xecode;
328    USPTR Xmstart;    USPTR Xmstart;
329      USPTR Xmarkptr;
330    int Xoffset_top;    int Xoffset_top;
331    long int Xims;    long int Xims;
332    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 432  Arguments: Line 434  Arguments:
434     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
435     mstart      pointer to the current match start position (can be modified     mstart      pointer to the current match start position (can be modified
436                   by encountering \K)                   by encountering \K)
437       markptr     pointer to the most recent MARK name, or NULL
438     offset_top  current top pointer     offset_top  current top pointer
439     md          pointer to "static" info for the match     md          pointer to "static" info for the match
440     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 450  Returns:       MATCH_MATCH if matched Line 453  Returns:       MATCH_MATCH if matched
453  */  */
454    
455  static int  static int
456  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, USPTR
457    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    markptr, int offset_top, match_data *md, unsigned long int ims,
458    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
459  {  {
460  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
461  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 480  frame->Xprevframe = NULL;            /* Line 483  frame->Xprevframe = NULL;            /*
483  frame->Xeptr = eptr;  frame->Xeptr = eptr;
484  frame->Xecode = ecode;  frame->Xecode = ecode;
485  frame->Xmstart = mstart;  frame->Xmstart = mstart;
486    frame->Xmarkptr = markptr;
487  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
488  frame->Xims = ims;  frame->Xims = ims;
489  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 495  HEAP_RECURSE: Line 499  HEAP_RECURSE:
499  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
500  #define ecode              frame->Xecode  #define ecode              frame->Xecode
501  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
502    #define markptr            frame->Xmarkptr
503  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
504  #define ims                frame->Xims  #define ims                frame->Xims
505  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 622  TAIL_RECURSE: Line 627  TAIL_RECURSE:
627  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
628  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
629  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
630  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
631  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
632  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
633  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
# Line 713  for (;;) Line 718  for (;;)
718      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
719      offset = number << 1;      offset = number << 1;
720    
721  #ifdef DEBUG  #ifdef PCRE_DEBUG
722      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
723      printf("subject=");      printf("subject=");
724      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 1039  for (;;) Line 1044  for (;;)
1044      number = GET2(ecode, 1);      number = GET2(ecode, 1);
1045      offset = number << 1;      offset = number << 1;
1046    
1047  #ifdef DEBUG  #ifdef PCRE_DEBUG
1048        printf("end bracket %d at *ACCEPT", number);        printf("end bracket %d at *ACCEPT", number);
1049        printf("\n");        printf("\n");
1050  #endif  #endif
# Line 1070  for (;;) Line 1075  for (;;)
1075        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1076          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1077        offset_top = rec->save_offset_top;        offset_top = rec->save_offset_top;
       mstart = rec->save_start;  
1078        ims = original_ims;        ims = original_ims;
1079        ecode = rec->after_call;        ecode = rec->after_call;
1080        break;        break;
# Line 1114  for (;;) Line 1118  for (;;)
1118        {        {
1119        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1120          RM4);          RM4);
1121        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH)
1122            {
1123            mstart = md->start_match_ptr;   /* In case \K reset it */
1124            break;
1125            }
1126        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1127        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1128        }        }
# Line 1133  for (;;) Line 1141  for (;;)
1141      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1142      continue;      continue;
1143    
1144      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1145        PRUNE, or COMMIT means we must assume failure without checking subsequent
1146        branches. */
1147    
1148      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1149      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 1142  for (;;) Line 1152  for (;;)
1152        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1153          RM5);          RM5);
1154        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
1155          if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1156            {
1157            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1158            break;
1159            }
1160        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1161        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1162        }        }
# Line 1260  for (;;) Line 1275  for (;;)
1275    
1276        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1277              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
       new_recursive.save_start = mstart;  
1278        new_recursive.save_offset_top = offset_top;        new_recursive.save_offset_top = offset_top;
       mstart = eptr;  
1279    
1280        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1281        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 1309  for (;;) Line 1322  for (;;)
1322      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1323      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1324      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1325      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1326        the start-of-match value in case it was changed by \K. */
1327    
1328      case OP_ONCE:      case OP_ONCE:
1329      prev = ecode;      prev = ecode;
# Line 1318  for (;;) Line 1332  for (;;)
1332      do      do
1333        {        {
1334        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1335        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH)
1336            {
1337            mstart = md->start_match_ptr;
1338            break;
1339            }
1340        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1341        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1342        }        }
# Line 1437  for (;;) Line 1455  for (;;)
1455        }        }
1456      else saved_eptr = NULL;      else saved_eptr = NULL;
1457    
1458      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1459      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1460      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1461        it was changed by \K. */
1462    
1463      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1464          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1447  for (;;) Line 1466  for (;;)
1466        {        {
1467        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1468        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1469          md->start_match_ptr = mstart;
1470        RRETURN(MATCH_MATCH);        RRETURN(MATCH_MATCH);
1471        }        }
1472    
# Line 1461  for (;;) Line 1481  for (;;)
1481        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1482        offset = number << 1;        offset = number << 1;
1483    
1484  #ifdef DEBUG  #ifdef PCRE_DEBUG
1485        printf("end bracket %d", number);        printf("end bracket %d", number);
1486        printf("\n");        printf("\n");
1487  #endif  #endif
# Line 1483  for (;;) Line 1503  for (;;)
1503          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1504          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1505          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         mstart = rec->save_start;  
1506          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1507            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1508          offset_top = rec->save_offset_top;          offset_top = rec->save_offset_top;
# Line 3688  for (;;) Line 3707  for (;;)
3707          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3708          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3709            {            {
3710            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3711               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))              {
3712                SCHECK_PARTIAL();
3713                RRETURN(MATCH_NOMATCH);
3714                }
3715              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3716              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3717            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3718            }            }
# Line 5565  for(;;) Line 5588  for(;;)
5588      bytes to avoid spending too much time in this optimization. */      bytes to avoid spending too much time in this optimization. */
5589    
5590      if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&      if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
5591          end_subject - start_match < study->minlength)          (pcre_uint32)(end_subject - start_match) < study->minlength)
5592        {        {
5593        rc = MATCH_NOMATCH;        rc = MATCH_NOMATCH;
5594        break;        break;
# Line 5628  for(;;) Line 5651  for(;;)
5651        }        }
5652      }      }
5653    
5654  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
5655    printf(">>>> Match against: ");    printf(">>>> Match against: ");
5656    pchars(start_match, end_subject - start_match, TRUE, md);    pchars(start_match, end_subject - start_match, TRUE, md);
5657    printf("\n");    printf("\n");
# Line 5640  for(;;) Line 5663  for(;;)
5663    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
5664    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
5665    md->match_call_count = 0;    md->match_call_count = 0;
5666    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);    rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
5667        0, 0);
5668    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
5669    
5670    switch(rc)    switch(rc)

Legend:
Removed from v.463  
changed lines
  Added in v.501

  ViewVC Help
Powered by ViewVC 1.1.5