/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 534 by ph10, Thu Jun 3 18:26:05 2010 UTC revision 569 by ph10, Sun Nov 7 16:14:50 2010 UTC
# Line 294  argument of match(), which never changes Line 294  argument of match(), which never changes
294    
295  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
298    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299    frame->Xwhere = rw; \    frame->Xwhere = rw; \
300    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
# Line 422  immediately. The second one is used when Line 422  immediately. The second one is used when
422  the subject. */  the subject. */
423    
424  #define CHECK_PARTIAL()\  #define CHECK_PARTIAL()\
425    if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\    if (md->partial != 0 && eptr >= md->end_subject && \
426      {\        eptr > md->start_used_ptr) \
427      md->hitend = TRUE;\      { \
428      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      md->hitend = TRUE; \
429        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
430      }      }
431    
432  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
433    if (md->partial != 0 && eptr > mstart)\    if (md->partial != 0 && eptr > md->start_used_ptr) \
434      {\      { \
435      md->hitend = TRUE;\      md->hitend = TRUE; \
436      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
437      }      }
438    
439    
# Line 488  heap storage. Set up the top-level frame Line 489  heap storage. Set up the top-level frame
489  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
490    
491  #ifdef NO_RECURSE  #ifdef NO_RECURSE
492  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
493  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
494  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
495    
# Line 710  for (;;) Line 711  for (;;)
711      case OP_FAIL:      case OP_FAIL:
712      MRRETURN(MATCH_NOMATCH);      MRRETURN(MATCH_NOMATCH);
713    
714        /* COMMIT overrides PRUNE, SKIP, and THEN */
715    
716      case OP_COMMIT:      case OP_COMMIT:
717      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718        ims, eptrb, flags, RM52);        ims, eptrb, flags, RM52);
719      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
720            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
721            rrc != MATCH_THEN)
722          RRETURN(rrc);
723      MRRETURN(MATCH_COMMIT);      MRRETURN(MATCH_COMMIT);
724    
725        /* PRUNE overrides THEN */
726    
727      case OP_PRUNE:      case OP_PRUNE:
728      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM51);
730      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
731      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
732    
733      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
734      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
735        ims, eptrb, flags, RM56);        ims, eptrb, flags, RM56);
736      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
737      md->mark = ecode + 2;      md->mark = ecode + 2;
738      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
739    
740        /* SKIP overrides PRUNE and THEN */
741    
742      case OP_SKIP:      case OP_SKIP:
743      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
744        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM53);
745      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
746          RRETURN(rrc);
747      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
748      MRRETURN(MATCH_SKIP);      MRRETURN(MATCH_SKIP);
749    
750      case OP_SKIP_ARG:      case OP_SKIP_ARG:
751      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752        ims, eptrb, flags, RM57);        ims, eptrb, flags, RM57);
753      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
754          RRETURN(rrc);
755    
756      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
757      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
# Line 749  for (;;) Line 761  for (;;)
761      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
762      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
763    
764        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
765        the alt that is at the start of the current branch. This makes it possible
766        to skip back past alternatives that precede the THEN within the current
767        branch. */
768    
769      case OP_THEN:      case OP_THEN:
770      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
771        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM54);
772      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
773        md->start_match_ptr = ecode - GET(ecode, 1);
774      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
775    
776      case OP_THEN_ARG:      case OP_THEN_ARG:
777      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
778        ims, eptrb, flags, RM58);        offset_top, md, ims, eptrb, flags, RM58);
779      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
780      md->mark = ecode + 2;      md->start_match_ptr = ecode - GET(ecode, 1);
781        md->mark = ecode + LINK_SIZE + 2;
782      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
783    
784      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
# Line 804  for (;;) Line 823  for (;;)
823          {          {
824          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
825            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
826          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
827                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
828              RRETURN(rrc);
829          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
830          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
831          }          }
# Line 865  for (;;) Line 886  for (;;)
886    
887        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
888          eptrb, flags, RM2);          eptrb, flags, RM2);
889        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
890              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
891            RRETURN(rrc);
892        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
893        }        }
894      /* Control never reaches here. */      /* Control never reaches here. */
# Line 1066  for (;;) Line 1089  for (;;)
1089          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1090          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1091          }          }
1092        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH &&
1093                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1094          {          {
1095          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1096          }          }
# Line 1194  for (;;) Line 1218  for (;;)
1218          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1219          break;          break;
1220          }          }
1221        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1222              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1223            RRETURN(rrc);
1224        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1225        }        }
1226      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1228  for (;;) Line 1254  for (;;)
1254          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1255          break;          break;
1256          }          }
1257        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1258              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1259            RRETURN(rrc);
1260        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1261        }        }
1262      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1365  for (;;) Line 1393  for (;;)
1393              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1394            MRRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1395            }            }
1396          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH &&
1397                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1398            {            {
1399            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1400            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1408  for (;;) Line 1437  for (;;)
1437          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
1438          break;          break;
1439          }          }
1440        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1441              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1442            RRETURN(rrc);
1443        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1444        }        }
1445      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1674  for (;;) Line 1705  for (;;)
1705        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1706          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1707        else        else
1708          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }          {
1709            if (md->noteol) MRRETURN(MATCH_NOMATCH);
1710            SCHECK_PARTIAL();
1711            }
1712        ecode++;        ecode++;
1713        break;        break;
1714        }        }
1715      else      else  /* Not multiline */
1716        {        {
1717        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1718        if (!md->endonly)        if (!md->endonly) goto ASSERT_NL_OR_EOS;
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           MRRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1719        }        }
1720    
1721      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1722    
1723      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1724    
1725      case OP_EOD:      case OP_EOD:
1726      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1727        SCHECK_PARTIAL();
1728      ecode++;      ecode++;
1729      break;      break;
1730    
1731      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1732    
1733      case OP_EODN:      case OP_EODN:
1734      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1735        if (eptr < md->end_subject &&
1736          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1737        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1738    
1739        /* Either at end of string or \n before end. */
1740    
1741        SCHECK_PARTIAL();
1742      ecode++;      ecode++;
1743      break;      break;
1744    
# Line 5600  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 5634  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
5634  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL ||
5635     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5636  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5637    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5638    
5639  /* This information is for finding all the numbers associated with a given  /* This information is for finding all the numbers associated with a given
5640  name, for condition testing. */  name, for condition testing. */
# Line 5766  back the character offset. */ Line 5801  back the character offset. */
5801  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5802  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5803    {    {
5804    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)    int tb;
5805      return PCRE_ERROR_BADUTF8;    if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
5806        return (tb == length && md->partial > 1)?
5807          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5808    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
5809      {      {
5810      int tb = ((USPTR)subject)[start_offset];      tb = ((USPTR)subject)[start_offset] & 0xc0;
5811      if (tb > 127)      if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
       {  
       tb &= 0xc0;  
       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;  
       }  
5812      }      }
5813    }    }
5814  #endif  #endif
# Line 5959  for(;;) Line 5992  for(;;)
5992        while (start_match < end_subject)        while (start_match < end_subject)
5993          {          {
5994          register unsigned int c = *start_match;          register unsigned int c = *start_match;
5995          if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;          if ((start_bits[c/8] & (1 << (c&7))) == 0)
5996            else break;            {
5997              start_match++;
5998    #ifdef SUPPORT_UTF8
5999              if (utf8)
6000                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6001                  start_match++;
6002    #endif
6003              }
6004            else break;
6005          }          }
6006        }        }
6007      }   /* Starting optimizations */      }   /* Starting optimizations */
# Line 6061  for(;;) Line 6102  for(;;)
6102    
6103    switch(rc)    switch(rc)
6104      {      {
6105      /* NOMATCH and PRUNE advance by one character. If MATCH_SKIP_ARG reaches      /* SKIP passes back the next starting point explicitly, but if it is the
6106      this level it means that a MARK that matched the SKIP's arg was not found.      same as the match we have just done, treat it as NOMATCH. */
6107      We treat this as NOMATCH. THEN at this level acts exactly like PRUNE. */  
6108        case MATCH_SKIP:
6109        if (md->start_match_ptr != start_match)
6110          {
6111          new_start_match = md->start_match_ptr;
6112          break;
6113          }
6114        /* Fall through */
6115    
6116        /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6117        the SKIP's arg was not found. We also treat this as NOMATCH. */
6118    
6119        case MATCH_SKIP_ARG:
6120        /* Fall through */
6121    
6122        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6123        exactly like PRUNE. */
6124    
6125      case MATCH_NOMATCH:      case MATCH_NOMATCH:
6126      case MATCH_PRUNE:      case MATCH_PRUNE:
     case MATCH_SKIP_ARG:  
6127      case MATCH_THEN:      case MATCH_THEN:
6128      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6129  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 6077  for(;;) Line 6133  for(;;)
6133  #endif  #endif
6134      break;      break;
6135    
     /* SKIP passes back the next starting point explicitly. */  
   
     case MATCH_SKIP:  
     new_start_match = md->start_match_ptr;  
     break;  
   
6136      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6137    
6138      case MATCH_COMMIT:      case MATCH_COMMIT:

Legend:
Removed from v.534  
changed lines
  Added in v.569

  ViewVC Help
Powered by ViewVC 1.1.5