/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 511 by ph10, Mon Mar 29 09:25:38 2010 UTC revision 569 by ph10, Sun Nov 7 16:14:50 2010 UTC
# Line 255  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 255  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
255         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258         RM51,  RM52, RM53, RM54 };         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
262  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 293  argument of match(), which never changes Line 294  argument of match(), which never changes
294    
295  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
298      if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299    frame->Xwhere = rw; \    frame->Xwhere = rw; \
300    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
301    newframe->Xecode = rb;\    newframe->Xecode = rb;\
# Line 314  argument of match(), which never changes Line 316  argument of match(), which never changes
316    
317  #define RRETURN(ra)\  #define RRETURN(ra)\
318    {\    {\
319    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
320    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
321    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
322    if (frame != NULL)\    if (frame != NULL)\
323      {\      {\
324      rrc = ra;\      rrc = ra;\
# Line 420  immediately. The second one is used when Line 422  immediately. The second one is used when
422  the subject. */  the subject. */
423    
424  #define CHECK_PARTIAL()\  #define CHECK_PARTIAL()\
425    if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\    if (md->partial != 0 && eptr >= md->end_subject && \
426      {\        eptr > md->start_used_ptr) \
427      md->hitend = TRUE;\      { \
428      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      md->hitend = TRUE; \
429        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
430      }      }
431    
432  #define SCHECK_PARTIAL()\  #define SCHECK_PARTIAL()\
433    if (md->partial != 0 && eptr > mstart)\    if (md->partial != 0 && eptr > md->start_used_ptr) \
434      {\      { \
435      md->hitend = TRUE;\      md->hitend = TRUE; \
436      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\      if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
437      }      }
438    
439    
# Line 486  heap storage. Set up the top-level frame Line 489  heap storage. Set up the top-level frame
489  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
490    
491  #ifdef NO_RECURSE  #ifdef NO_RECURSE
492  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
493    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
494  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
495    
496  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 685  for (;;) Line 689  for (;;)
689      case OP_MARK:      case OP_MARK:
690      markptr = ecode + 2;      markptr = ecode + 2;
691      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
692        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM55);
693    
694      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
695      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
696      argument. It is passed back in md->start_match_ptr (an overloading of that      argument. It is passed back in md->start_match_ptr (an overloading of that
697      variable). If it does match, we reset that variable to the current subject      variable). If it does match, we reset that variable to the current subject
698      position and return MATCH_SKIP. Otherwise, pass back the return code      position and return MATCH_SKIP. Otherwise, pass back the return code
699      unaltered. */      unaltered. */
700    
701      if (rrc == MATCH_SKIP_ARG &&      if (rrc == MATCH_SKIP_ARG &&
702          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
703        {        {
704        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
705        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
706        }        }
707    
708      if (md->mark == NULL) md->mark = markptr;      if (md->mark == NULL) md->mark = markptr;
709      RRETURN(rrc);      RRETURN(rrc);
710    
711      case OP_FAIL:      case OP_FAIL:
712      MRRETURN(MATCH_NOMATCH);      MRRETURN(MATCH_NOMATCH);
713    
714        /* COMMIT overrides PRUNE, SKIP, and THEN */
715    
716      case OP_COMMIT:      case OP_COMMIT:
717      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718        ims, eptrb, flags, RM52);        ims, eptrb, flags, RM52);
719      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
720            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
721            rrc != MATCH_THEN)
722          RRETURN(rrc);
723      MRRETURN(MATCH_COMMIT);      MRRETURN(MATCH_COMMIT);
724    
725        /* PRUNE overrides THEN */
726    
727      case OP_PRUNE:      case OP_PRUNE:
728      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM51);
730      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
731      MRRETURN(MATCH_PRUNE);      MRRETURN(MATCH_PRUNE);
732    
733      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
734      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
735        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM56);
736      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
737      md->mark = ecode + 2;      md->mark = ecode + 2;
738      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
739    
740        /* SKIP overrides PRUNE and THEN */
741    
742      case OP_SKIP:      case OP_SKIP:
743      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
744        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM53);
745      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
746          RRETURN(rrc);
747      md->start_match_ptr = eptr;   /* Pass back current position */      md->start_match_ptr = eptr;   /* Pass back current position */
748      MRRETURN(MATCH_SKIP);      MRRETURN(MATCH_SKIP);
749    
750      case OP_SKIP_ARG:      case OP_SKIP_ARG:
751      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM57);
753      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
754          RRETURN(rrc);
755      /* Pass back the current skip name by overloading md->start_match_ptr and  
756      returning the special MATCH_SKIP_ARG return code. This will either be      /* Pass back the current skip name by overloading md->start_match_ptr and
757      caught by a matching MARK, or get to the top, where it is treated the same      returning the special MATCH_SKIP_ARG return code. This will either be
758        caught by a matching MARK, or get to the top, where it is treated the same
759      as PRUNE. */      as PRUNE. */
760    
761      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
762      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
763    
764        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
765        the alt that is at the start of the current branch. This makes it possible
766        to skip back past alternatives that precede the THEN within the current
767        branch. */
768    
769      case OP_THEN:      case OP_THEN:
770      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
771        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM54);
772      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
773        md->start_match_ptr = ecode - GET(ecode, 1);
774      MRRETURN(MATCH_THEN);      MRRETURN(MATCH_THEN);
775    
776      case OP_THEN_ARG:      case OP_THEN_ARG:
777      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
778        ims, eptrb, flags, RM54);        offset_top, md, ims, eptrb, flags, RM58);
779      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
780      md->mark = ecode + 2;      md->start_match_ptr = ecode - GET(ecode, 1);
781        md->mark = ecode + LINK_SIZE + 2;
782      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
783    
784      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
# Line 793  for (;;) Line 815  for (;;)
815        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
816    
817        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
818        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
819            (int)(eptr - md->start_subject);
820    
821        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
822        do        do
823          {          {
824          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
825            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
826          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
827                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
828              RRETURN(rrc);
829          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
830          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
831          }          }
# Line 852  for (;;) Line 877  for (;;)
877    
878          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
879            eptrb, flags, RM48);            eptrb, flags, RM48);
880          if (rrc == MATCH_NOMATCH) md->mark = markptr;          if (rrc == MATCH_NOMATCH) md->mark = markptr;
881          RRETURN(rrc);          RRETURN(rrc);
882          }          }
883    
884        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
# Line 861  for (;;) Line 886  for (;;)
886    
887        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
888          eptrb, flags, RM2);          eptrb, flags, RM2);
889        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
890              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
891            RRETURN(rrc);
892        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
893        }        }
894      /* Control never reaches here. */      /* Control never reaches here. */
# Line 888  for (;;) Line 915  for (;;)
915          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
916          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
917          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
918          cb.subject_length   = md->end_subject - md->start_subject;          cb.subject_length   = (int)(md->end_subject - md->start_subject);
919          cb.start_match      = mstart - md->start_subject;          cb.start_match      = (int)(mstart - md->start_subject);
920          cb.current_position = eptr - md->start_subject;          cb.current_position = (int)(eptr - md->start_subject);
921          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
922          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
923          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
# Line 1062  for (;;) Line 1089  for (;;)
1089          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1090          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1091          }          }
1092        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)        else if (rrc != MATCH_NOMATCH &&
1093                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1094          {          {
1095          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1096          }          }
# Line 1116  for (;;) Line 1144  for (;;)
1144        {        {
1145        md->offset_vector[offset] =        md->offset_vector[offset] =
1146          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
1147        md->offset_vector[offset+1] = eptr - md->start_subject;        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1148        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1149        }        }
1150      ecode += 3;      ecode += 3;
# Line 1158  for (;;) Line 1186  for (;;)
1186      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1187      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1188      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1189      MRRETURN(((op == OP_END)? MATCH_MATCH : MATCH_ACCEPT));  
1190        /* For some reason, the macros don't work properly if an expression is
1191        given as the argument to MRRETURN when the heap is in use. */
1192    
1193        rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1194        MRRETURN(rrc);
1195    
1196      /* Change option settings */      /* Change option settings */
1197    
# Line 1185  for (;;) Line 1218  for (;;)
1218          mstart = md->start_match_ptr;   /* In case \K reset it */          mstart = md->start_match_ptr;   /* In case \K reset it */
1219          break;          break;
1220          }          }
1221        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1222              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1223            RRETURN(rrc);
1224        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1225        }        }
1226      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1219  for (;;) Line 1254  for (;;)
1254          do ecode += GET(ecode,1); while (*ecode == OP_ALT);          do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1255          break;          break;
1256          }          }
1257        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1258              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1259            RRETURN(rrc);
1260        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1261        }        }
1262      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1274  for (;;) Line 1311  for (;;)
1311        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1312        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1313        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1314        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1315        cb.start_match      = mstart - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1316        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1317        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1318        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1319        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
# Line 1356  for (;;) Line 1393  for (;;)
1393              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1394            MRRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1395            }            }
1396          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH &&
1397                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1398            {            {
1399            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1400            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
# Line 1399  for (;;) Line 1437  for (;;)
1437          mstart = md->start_match_ptr;          mstart = md->start_match_ptr;
1438          break;          break;
1439          }          }
1440        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
1441              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1442            RRETURN(rrc);
1443        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1444        }        }
1445      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1553  for (;;) Line 1593  for (;;)
1593          {          {
1594          md->offset_vector[offset] =          md->offset_vector[offset] =
1595            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1596          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1597          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1598          }          }
1599    
# Line 1665  for (;;) Line 1705  for (;;)
1705        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1706          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1707        else        else
1708          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }          {
1709            if (md->noteol) MRRETURN(MATCH_NOMATCH);
1710            SCHECK_PARTIAL();
1711            }
1712        ecode++;        ecode++;
1713        break;        break;
1714        }        }
1715      else      else  /* Not multiline */
1716        {        {
1717        if (md->noteol) MRRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1718        if (!md->endonly)        if (!md->endonly) goto ASSERT_NL_OR_EOS;
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           MRRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1719        }        }
1720    
1721      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1722    
1723      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1724    
1725      case OP_EOD:      case OP_EOD:
1726      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1727        SCHECK_PARTIAL();
1728      ecode++;      ecode++;
1729      break;      break;
1730    
1731      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1732    
1733      case OP_EODN:      case OP_EODN:
1734      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1735        if (eptr < md->end_subject &&
1736          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1737        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1738    
1739        /* Either at end of string or \n before end. */
1740    
1741        SCHECK_PARTIAL();
1742      ecode++;      ecode++;
1743      break;      break;
1744    
# Line 1713  for (;;) Line 1756  for (;;)
1756  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1757        if (utf8)        if (utf8)
1758          {          {
1759            /* Get status of previous character */
1760    
1761          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1762            {            {
1763            USPTR lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1764            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1765            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1766            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1767    #ifdef SUPPORT_UCP
1768              if (md->use_ucp)
1769                {
1770                if (c == '_') prev_is_word = TRUE; else
1771                  {
1772                  int cat = UCD_CATEGORY(c);
1773                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1774                  }
1775                }
1776              else
1777    #endif
1778            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1779            }            }
1780    
1781            /* Get status of next character */
1782    
1783          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1784            {            {
1785            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 1729  for (;;) Line 1788  for (;;)
1788          else          else
1789            {            {
1790            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1791    #ifdef SUPPORT_UCP
1792              if (md->use_ucp)
1793                {
1794                if (c == '_') cur_is_word = TRUE; else
1795                  {
1796                  int cat = UCD_CATEGORY(c);
1797                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1798                  }
1799                }
1800              else
1801    #endif
1802            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1803            }            }
1804          }          }
1805        else        else
1806  #endif  #endif
1807    
1808        /* Not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1809          consistency with the behaviour of \w we do use it in this case. */
1810    
1811          {          {
1812            /* Get status of previous character */
1813    
1814          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1815            {            {
1816            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1817    #ifdef SUPPORT_UCP
1818              if (md->use_ucp)
1819                {
1820                c = eptr[-1];
1821                if (c == '_') prev_is_word = TRUE; else
1822                  {
1823                  int cat = UCD_CATEGORY(c);
1824                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1825                  }
1826                }
1827              else
1828    #endif
1829            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1830            }            }
1831    
1832            /* Get status of next character */
1833    
1834          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1835            {            {
1836            SCHECK_PARTIAL();            SCHECK_PARTIAL();
1837            cur_is_word = FALSE;            cur_is_word = FALSE;
1838            }            }
1839          else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          else
1840    #ifdef SUPPORT_UCP
1841            if (md->use_ucp)
1842              {
1843              c = *eptr;
1844              if (c == '_') cur_is_word = TRUE; else
1845                {
1846                int cat = UCD_CATEGORY(c);
1847                cur_is_word = (cat == ucp_L || cat == ucp_N);
1848                }
1849              }
1850            else
1851    #endif
1852            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1853          }          }
1854    
1855        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 2055  for (;;) Line 2156  for (;;)
2156               prop->chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2157               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2158            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2159           break;          break;
2160    
2161          case PT_GC:          case PT_GC:
2162          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
# Line 2072  for (;;) Line 2173  for (;;)
2173            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2174          break;          break;
2175    
2176            /* These are specials */
2177    
2178            case PT_ALNUM:
2179            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2180                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2181              MRRETURN(MATCH_NOMATCH);
2182            break;
2183    
2184            case PT_SPACE:    /* Perl space */
2185            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2186                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2187                   == (op == OP_NOTPROP))
2188              MRRETURN(MATCH_NOMATCH);
2189            break;
2190    
2191            case PT_PXSPACE:  /* POSIX space */
2192            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2193                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2194                 c == CHAR_FF || c == CHAR_CR)
2195                   == (op == OP_NOTPROP))
2196              MRRETURN(MATCH_NOMATCH);
2197            break;
2198    
2199            case PT_WORD:
2200            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2201                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2202                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2203              MRRETURN(MATCH_NOMATCH);
2204            break;
2205    
2206            /* This should never occur */
2207    
2208          default:          default:
2209          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
2210          }          }
# Line 2137  for (;;) Line 2270  for (;;)
2270        referenced subpattern. */        referenced subpattern. */
2271    
2272        if (offset >= offset_top || md->offset_vector[offset] < 0)        if (offset >= offset_top || md->offset_vector[offset] < 0)
2273          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;          length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2274        else        else
2275          length = md->offset_vector[offset+1] - md->offset_vector[offset];          length = md->offset_vector[offset+1] - md->offset_vector[offset];
2276    
# Line 3488  for (;;) Line 3621  for (;;)
3621              }              }
3622            break;            break;
3623    
3624              case PT_ALNUM:
3625              for (i = 1; i <= min; i++)
3626                {
3627                if (eptr >= md->end_subject)
3628                  {
3629                  SCHECK_PARTIAL();
3630                  MRRETURN(MATCH_NOMATCH);
3631                  }
3632                GETCHARINCTEST(c, eptr);
3633                prop_category = UCD_CATEGORY(c);
3634                if ((prop_category == ucp_L || prop_category == ucp_N)
3635                       == prop_fail_result)
3636                  MRRETURN(MATCH_NOMATCH);
3637                }
3638              break;
3639    
3640              case PT_SPACE:    /* Perl space */
3641              for (i = 1; i <= min; i++)
3642                {
3643                if (eptr >= md->end_subject)
3644                  {
3645                  SCHECK_PARTIAL();
3646                  MRRETURN(MATCH_NOMATCH);
3647                  }
3648                GETCHARINCTEST(c, eptr);
3649                prop_category = UCD_CATEGORY(c);
3650                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3651                     c == CHAR_FF || c == CHAR_CR)
3652                       == prop_fail_result)
3653                  MRRETURN(MATCH_NOMATCH);
3654                }
3655              break;
3656    
3657              case PT_PXSPACE:  /* POSIX space */
3658              for (i = 1; i <= min; i++)
3659                {
3660                if (eptr >= md->end_subject)
3661                  {
3662                  SCHECK_PARTIAL();
3663                  MRRETURN(MATCH_NOMATCH);
3664                  }
3665                GETCHARINCTEST(c, eptr);
3666                prop_category = UCD_CATEGORY(c);
3667                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3668                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3669                       == prop_fail_result)
3670                  MRRETURN(MATCH_NOMATCH);
3671                }
3672              break;
3673    
3674              case PT_WORD:
3675              for (i = 1; i <= min; i++)
3676                {
3677                if (eptr >= md->end_subject)
3678                  {
3679                  SCHECK_PARTIAL();
3680                  MRRETURN(MATCH_NOMATCH);
3681                  }
3682                GETCHARINCTEST(c, eptr);
3683                prop_category = UCD_CATEGORY(c);
3684                if ((prop_category == ucp_L || prop_category == ucp_N ||
3685                     c == CHAR_UNDERSCORE)
3686                       == prop_fail_result)
3687                  MRRETURN(MATCH_NOMATCH);
3688                }
3689              break;
3690    
3691              /* This should not occur */
3692    
3693            default:            default:
3694            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
3695            }            }
# Line 4048  for (;;) Line 4250  for (;;)
4250                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4251                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4252                }                }
4253              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4254              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4255              }              }
4256            /* Control never gets here */            /* Control never gets here */
# Line 4064  for (;;) Line 4266  for (;;)
4266                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4267                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4268                }                }
4269              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4270              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4271              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4272                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4084  for (;;) Line 4286  for (;;)
4286                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4287                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4288                }                }
4289              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4290              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4291              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4292                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4102  for (;;) Line 4304  for (;;)
4304                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4305                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4306                }                }
4307              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4308              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4309              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4310                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4120  for (;;) Line 4322  for (;;)
4322                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4323                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4324                }                }
4325              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4326              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4327              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4328                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4329              }              }
4330            /* Control never gets here */            /* Control never gets here */
4331    
4332              case PT_ALNUM:
4333              for (fi = min;; fi++)
4334                {
4335                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4336                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4337                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4338                if (eptr >= md->end_subject)
4339                  {
4340                  SCHECK_PARTIAL();
4341                  MRRETURN(MATCH_NOMATCH);
4342                  }
4343                GETCHARINCTEST(c, eptr);
4344                prop_category = UCD_CATEGORY(c);
4345                if ((prop_category == ucp_L || prop_category == ucp_N)
4346                       == prop_fail_result)
4347                  MRRETURN(MATCH_NOMATCH);
4348                }
4349              /* Control never gets here */
4350    
4351              case PT_SPACE:    /* Perl space */
4352              for (fi = min;; fi++)
4353                {
4354                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4355                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4356                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4357                if (eptr >= md->end_subject)
4358                  {
4359                  SCHECK_PARTIAL();
4360                  MRRETURN(MATCH_NOMATCH);
4361                  }
4362                GETCHARINCTEST(c, eptr);
4363                prop_category = UCD_CATEGORY(c);
4364                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4365                     c == CHAR_FF || c == CHAR_CR)
4366                       == prop_fail_result)
4367                  MRRETURN(MATCH_NOMATCH);
4368                }
4369              /* Control never gets here */
4370    
4371              case PT_PXSPACE:  /* POSIX space */
4372              for (fi = min;; fi++)
4373                {
4374                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4375                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4376                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4377                if (eptr >= md->end_subject)
4378                  {
4379                  SCHECK_PARTIAL();
4380                  MRRETURN(MATCH_NOMATCH);
4381                  }
4382                GETCHARINCTEST(c, eptr);
4383                prop_category = UCD_CATEGORY(c);
4384                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4385                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4386                       == prop_fail_result)
4387                  MRRETURN(MATCH_NOMATCH);
4388                }
4389              /* Control never gets here */
4390    
4391              case PT_WORD:
4392              for (fi = min;; fi++)
4393                {
4394                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4395                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4396                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4397                if (eptr >= md->end_subject)
4398                  {
4399                  SCHECK_PARTIAL();
4400                  MRRETURN(MATCH_NOMATCH);
4401                  }
4402                GETCHARINCTEST(c, eptr);
4403                prop_category = UCD_CATEGORY(c);
4404                if ((prop_category == ucp_L ||
4405                     prop_category == ucp_N ||
4406                     c == CHAR_UNDERSCORE)
4407                       == prop_fail_result)
4408                  MRRETURN(MATCH_NOMATCH);
4409                }
4410              /* Control never gets here */
4411    
4412              /* This should never occur */
4413    
4414            default:            default:
4415            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
4416            }            }
# Line 4473  for (;;) Line 4757  for (;;)
4757                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4758                break;                break;
4759                }                }
4760              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4761              if (prop_fail_result) break;              if (prop_fail_result) break;
4762              eptr+= len;              eptr+= len;
4763              }              }
# Line 4488  for (;;) Line 4772  for (;;)
4772                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4773                break;                break;
4774                }                }
4775              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4776              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4777              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4778                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4507  for (;;) Line 4791  for (;;)
4791                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4792                break;                break;
4793                }                }
4794              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4795              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4796              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4797                break;                break;
# Line 4524  for (;;) Line 4808  for (;;)
4808                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4809                break;                break;
4810                }                }
4811              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4812              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4813              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4814                break;                break;
# Line 4541  for (;;) Line 4825  for (;;)
4825                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4826                break;                break;
4827                }                }
4828              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4829              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4830              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4831                break;                break;
4832              eptr+= len;              eptr+= len;
4833              }              }
4834            break;            break;
4835    
4836              case PT_ALNUM:
4837              for (i = min; i < max; i++)
4838                {
4839                int len = 1;
4840                if (eptr >= md->end_subject)
4841                  {
4842                  SCHECK_PARTIAL();
4843                  break;
4844                  }
4845                GETCHARLENTEST(c, eptr, len);
4846                prop_category = UCD_CATEGORY(c);
4847                if ((prop_category == ucp_L || prop_category == ucp_N)
4848                     == prop_fail_result)
4849                  break;
4850                eptr+= len;
4851                }
4852              break;
4853    
4854              case PT_SPACE:    /* Perl space */
4855              for (i = min; i < max; i++)
4856                {
4857                int len = 1;
4858                if (eptr >= md->end_subject)
4859                  {
4860                  SCHECK_PARTIAL();
4861                  break;
4862                  }
4863                GETCHARLENTEST(c, eptr, len);
4864                prop_category = UCD_CATEGORY(c);
4865                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4866                     c == CHAR_FF || c == CHAR_CR)
4867                     == prop_fail_result)
4868                  break;
4869                eptr+= len;
4870                }
4871              break;
4872    
4873              case PT_PXSPACE:  /* POSIX space */
4874              for (i = min; i < max; i++)
4875                {
4876                int len = 1;
4877                if (eptr >= md->end_subject)
4878                  {
4879                  SCHECK_PARTIAL();
4880                  break;
4881                  }
4882                GETCHARLENTEST(c, eptr, len);
4883                prop_category = UCD_CATEGORY(c);
4884                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4885                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4886                     == prop_fail_result)
4887                  break;
4888                eptr+= len;
4889                }
4890              break;
4891    
4892              case PT_WORD:
4893              for (i = min; i < max; i++)
4894                {
4895                int len = 1;
4896                if (eptr >= md->end_subject)
4897                  {
4898                  SCHECK_PARTIAL();
4899                  break;
4900                  }
4901                GETCHARLENTEST(c, eptr, len);
4902                prop_category = UCD_CATEGORY(c);
4903                if ((prop_category == ucp_L || prop_category == ucp_N ||
4904                     c == CHAR_UNDERSCORE) == prop_fail_result)
4905                  break;
4906                eptr+= len;
4907                }
4908              break;
4909    
4910              default:
4911              RRETURN(PCRE_ERROR_INTERNAL);
4912            }            }
4913    
4914          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
# Line 5133  switch (frame->Xwhere) Line 5494  switch (frame->Xwhere)
5494    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5495    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5496    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5497    LBL(53) LBL(54)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5498  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5499    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5500    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
5501  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5502    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5503      LBL(59) LBL(60) LBL(61) LBL(62)
5504  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
5505  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
5506    default:    default:
# Line 5272  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 5634  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
5634  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL ||
5635     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5636  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5637    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5638    
5639  /* This information is for finding all the numbers associated with a given  /* This information is for finding all the numbers associated with a given
5640  name, for condition testing. */  name, for condition testing. */
# Line 5342  end_subject = md->end_subject; Line 5705  end_subject = md->end_subject;
5705    
5706  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5707  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5708    md->use_ucp = (re->options & PCRE_UCP) != 0;
5709  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5710    
5711  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
# Line 5437  back the character offset. */ Line 5801  back the character offset. */
5801  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5802  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5803    {    {
5804    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)    int tb;
5805      return PCRE_ERROR_BADUTF8;    if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
5806        return (tb == length && md->partial > 1)?
5807          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5808    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
5809      {      {
5810      int tb = ((USPTR)subject)[start_offset];      tb = ((USPTR)subject)[start_offset] & 0xc0;
5811      if (tb > 127)      if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
       {  
       tb &= 0xc0;  
       if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;  
       }  
5812      }      }
5813    }    }
5814  #endif  #endif
# Line 5630  for(;;) Line 5992  for(;;)
5992        while (start_match < end_subject)        while (start_match < end_subject)
5993          {          {
5994          register unsigned int c = *start_match;          register unsigned int c = *start_match;
5995          if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;          if ((start_bits[c/8] & (1 << (c&7))) == 0)
5996            else break;            {
5997              start_match++;
5998    #ifdef SUPPORT_UTF8
5999              if (utf8)
6000                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6001                  start_match++;
6002    #endif
6003              }
6004            else break;
6005          }          }
6006        }        }
6007      }   /* Starting optimizations */      }   /* Starting optimizations */
# Line 5722  for(;;) Line 6092  for(;;)
6092    
6093    /* OK, we can now run the match. If "hitend" is set afterwards, remember the    /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6094    first starting point for which a partial match was found. */    first starting point for which a partial match was found. */
6095    
6096    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
6097    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
6098    md->match_call_count = 0;    md->match_call_count = 0;
# Line 5732  for(;;) Line 6102  for(;;)
6102    
6103    switch(rc)    switch(rc)
6104      {      {
6105      /* NOMATCH and PRUNE advance by one character. If MATCH_SKIP_ARG reaches      /* SKIP passes back the next starting point explicitly, but if it is the
6106      this level it means that a MARK that matched the SKIP's arg was not found.      same as the match we have just done, treat it as NOMATCH. */
6107      We treat this as NOMATCH. THEN at this level acts exactly like PRUNE. */  
6108        case MATCH_SKIP:
6109        if (md->start_match_ptr != start_match)
6110          {
6111          new_start_match = md->start_match_ptr;
6112          break;
6113          }
6114        /* Fall through */
6115    
6116        /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6117        the SKIP's arg was not found. We also treat this as NOMATCH. */
6118    
6119        case MATCH_SKIP_ARG:
6120        /* Fall through */
6121    
6122        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6123        exactly like PRUNE. */
6124    
6125      case MATCH_NOMATCH:      case MATCH_NOMATCH:
6126      case MATCH_PRUNE:      case MATCH_PRUNE:
     case MATCH_SKIP_ARG:  
6127      case MATCH_THEN:      case MATCH_THEN:
6128      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6129  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 5748  for(;;) Line 6133  for(;;)
6133  #endif  #endif
6134      break;      break;
6135    
     /* SKIP passes back the next starting point explicitly. */  
   
     case MATCH_SKIP:  
     new_start_match = md->start_match_ptr;  
     break;  
   
6136      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6137    
6138      case MATCH_COMMIT:      case MATCH_COMMIT:
# Line 5850  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6229  if (rc == MATCH_MATCH || rc == MATCH_ACC
6229    
6230    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
6231      {      {
6232      offsets[0] = md->start_match_ptr - md->start_subject;      offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6233      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6234      }      }
6235    
6236    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
# Line 5866  if (using_temporary_offsets) Line 6245  if (using_temporary_offsets)
6245    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
6246    (pcre_free)(md->offset_vector);    (pcre_free)(md->offset_vector);
6247    }    }
6248    
6249  /* For anything other than nomatch or partial match, just return the code. */  /* For anything other than nomatch or partial match, just return the code. */
6250    
6251  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6252    {    {
6253    DPRINTF((">>>> error: returning %d\n", rc));    DPRINTF((">>>> error: returning %d\n", rc));
6254    return rc;    return rc;
6255    }    }
6256    
6257  /* Handle partial matches - disable any mark data */  /* Handle partial matches - disable any mark data */
6258    
6259  if (start_partial != NULL)  if (start_partial != NULL)
6260    {    {
6261    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6262    md->mark = NULL;    md->mark = NULL;
6263    if (offsetcount > 1)    if (offsetcount > 1)
6264      {      {
6265      offsets[0] = start_partial - (USPTR)subject;      offsets[0] = (int)(start_partial - (USPTR)subject);
6266      offsets[1] = end_subject - (USPTR)subject;      offsets[1] = (int)(end_subject - (USPTR)subject);
6267      }      }
6268    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6269    }    }
6270    
6271  /* This is the classic nomatch case */  /* This is the classic nomatch case */
6272    
6273  else  else
6274    {    {
6275    DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));    DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6276    rc = PCRE_ERROR_NOMATCH;    rc = PCRE_ERROR_NOMATCH;
6277    }    }
6278    
6279  /* Return the MARK data if it has been requested. */  /* Return the MARK data if it has been requested. */
6280    
6281  RETURN_MARK:  RETURN_MARK:
6282    
6283  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6284    *(extra_data->mark) = (unsigned char *)(md->mark);    *(extra_data->mark) = (unsigned char *)(md->mark);
6285  return rc;  return rc;
6286  }  }
6287    
6288  /* End of pcre_exec.c */  /* End of pcre_exec.c */

Legend:
Removed from v.511  
changed lines
  Added in v.569

  ViewVC Help
Powered by ViewVC 1.1.5