/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 882 by ph10, Sun Jan 15 18:45:27 2012 UTC revision 926 by ph10, Wed Feb 22 15:01:32 2012 UTC
# Line 140  Arguments: Line 140  Arguments:
140    md          points to match data block    md          points to match data block
141    caseless    TRUE if caseless    caseless    TRUE if caseless
142    
143  Returns:      < 0 if not matched, otherwise the number of subject bytes matched  Returns:      >= 0 the number of subject bytes matched
144                  -1 no match
145                  -2 partial match; always given if at end subject
146  */  */
147    
148  static int  static int
# Line 163  pchars(p, length, FALSE, md); Line 165  pchars(p, length, FALSE, md);
165  printf("\n");  printf("\n");
166  #endif  #endif
167    
168  /* Always fail if reference not set (and not JavaScript compatible). */  /* Always fail if reference not set (and not JavaScript compatible - in that
169    case the length is passed as zero). */
170    
171  if (length < 0) return -1;  if (length < 0) return -1;
172    
# Line 189  if (caseless) Line 192  if (caseless)
192      while (p < endptr)      while (p < endptr)
193        {        {
194        int c, d;        int c, d;
195        if (eptr >= md->end_subject) return -1;        if (eptr >= md->end_subject) return -2;   /* Partial match */
196        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
197        GETCHARINC(d, p);        GETCHARINC(d, p);
198        if (c != d && c != UCD_OTHERCASE(d)) return -1;        if (c != d && c != UCD_OTHERCASE(d)) return -1;
# Line 202  if (caseless) Line 205  if (caseless)
205    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
206    is no UCP support. */    is no UCP support. */
207      {      {
     if (eptr + length > md->end_subject) return -1;  
208      while (length-- > 0)      while (length-- > 0)
209        {        {
210          if (eptr >= md->end_subject) return -2;   /* Partial match */
211        if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;        if (TABLE_GET(*p, md->lcc, *p) != TABLE_GET(*eptr, md->lcc, *eptr)) return -1;
212        p++;        p++;
213        eptr++;        eptr++;
# Line 217  are in UTF-8 mode. */ Line 220  are in UTF-8 mode. */
220    
221  else  else
222    {    {
223    if (eptr + length > md->end_subject) return -1;    while (length-- > 0)
224    while (length-- > 0) if (*p++ != *eptr++) return -1;      {
225        if (eptr >= md->end_subject) return -2;   /* Partial match */
226        if (*p++ != *eptr++) return -1;
227        }
228    }    }
229    
230  return (int)(eptr - eptr_start);  return (int)(eptr - eptr_start);
# Line 332  argument of match(), which never changes Line 338  argument of match(), which never changes
338    {\    {\
339    heapframe *oldframe = frame;\    heapframe *oldframe = frame;\
340    frame = oldframe->Xprevframe;\    frame = oldframe->Xprevframe;\
341    (PUBL(stack_free))(oldframe);\    if (oldframe != &frame_zero) (PUBL(stack_free))(oldframe);\
342    if (frame != NULL)\    if (frame != NULL)\
343      {\      {\
344      rrc = ra;\      rrc = ra;\
# Line 485  BOOL caseless; Line 491  BOOL caseless;
491  int condcode;  int condcode;
492    
493  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
494  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame". We set up the top-level
495  heap storage. Set up the top-level frame here; others are obtained from the  frame on the stack here; subsequent instantiations are obtained from the heap
496  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  whenever RMATCH() does a "recursion". See the macro definitions above. Putting
497    the top-level on the stack rather than malloc-ing them all gives a performance
498    boost in many cases where there is not much "recursion". */
499    
500  #ifdef NO_RECURSE  #ifdef NO_RECURSE
501  heapframe *frame = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));  heapframe frame_zero;
502  if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);  heapframe *frame = &frame_zero;
503  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
504    
505  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 613  int save_offset1, save_offset2, save_off Line 621  int save_offset1, save_offset2, save_off
621  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
622    
623  eptrblock newptrb;  eptrblock newptrb;
624    
625    /* There is a special fudge for calling match() in a way that causes it to
626    measure the size of its basic stack frame when the stack is being used for
627    recursion. The second argument (ecode) being NULL triggers this behaviour. It
628    cannot normally ever be NULL. The return is the negated value of the frame
629    size. */
630    
631    if (ecode == NULL)
632      {
633      if (rdepth == 0)
634        return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
635      else
636        {
637        int len = (char *)&rdepth - (char *)eptr;
638        return (len > 0)? -len : len;
639        }
640      }
641  #endif     /* NO_RECURSE */  #endif     /* NO_RECURSE */
642    
643  /* To save space on the stack and in the heap frame, I have doubled up on some  /* To save space on the stack and in the heap frame, I have doubled up on some
# Line 819  for (;;) Line 844  for (;;)
844      case OP_ONCE_NC:      case OP_ONCE_NC:
845      prev = ecode;      prev = ecode;
846      saved_eptr = eptr;      saved_eptr = eptr;
847      save_mark = md->mark;      save_mark = md->mark;
848      do      do
849        {        {
850        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
# Line 838  for (;;) Line 863  for (;;)
863    
864        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
865        ecode += GET(ecode,1);        ecode += GET(ecode,1);
866        md->mark = save_mark;        md->mark = save_mark;
867        }        }
868      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
869    
# Line 918  for (;;) Line 943  for (;;)
943        save_offset2 = md->offset_vector[offset+1];        save_offset2 = md->offset_vector[offset+1];
944        save_offset3 = md->offset_vector[md->offset_end - number];        save_offset3 = md->offset_vector[md->offset_end - number];
945        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
946        save_mark = md->mark;        save_mark = md->mark;
947    
948        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
949        md->offset_vector[md->offset_end - number] =        md->offset_vector[md->offset_end - number] =
# Line 1024  for (;;) Line 1049  for (;;)
1049        save_mark = md->mark;        save_mark = md->mark;
1050        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1051          RM2);          RM2);
1052    
1053        /* See comment in the code for capturing groups above about handling        /* See comment in the code for capturing groups above about handling
1054        THEN. */        THEN. */
1055    
# Line 1051  for (;;) Line 1076  for (;;)
1076          RRETURN(rrc);          RRETURN(rrc);
1077          }          }
1078        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1079        md->mark = save_mark;        md->mark = save_mark;
1080        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1081        }        }
1082    
# Line 1530  for (;;) Line 1555  for (;;)
1555    
1556      case OP_ASSERT:      case OP_ASSERT:
1557      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1558        save_mark = md->mark;
1559      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1560        {        {
1561        condassert = TRUE;        condassert = TRUE;
# Line 1551  for (;;) Line 1577  for (;;)
1577    
1578        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1579        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1580          md->mark = save_mark;
1581        }        }
1582      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1583    
# Line 1574  for (;;) Line 1601  for (;;)
1601    
1602      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1603      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1604        save_mark = md->mark;
1605      if (md->match_function_type == MATCH_CONDASSERT)      if (md->match_function_type == MATCH_CONDASSERT)
1606        {        {
1607        condassert = TRUE;        condassert = TRUE;
# Line 1584  for (;;) Line 1612  for (;;)
1612      do      do
1613        {        {
1614        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1615          md->mark = save_mark;
1616        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) RRETURN(MATCH_NOMATCH);
1617        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1618          {          {
# Line 2036  for (;;) Line 2065  for (;;)
2065    
2066      case OP_DOLLM:      case OP_DOLLM:
2067      if (eptr < md->end_subject)      if (eptr < md->end_subject)
2068        { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }        {
2069          if (!IS_NEWLINE(eptr))
2070            {
2071            if (md->partial != 0 &&
2072                eptr + 1 >= md->end_subject &&
2073                NLBLOCK->nltype == NLTYPE_FIXED &&
2074                NLBLOCK->nllen == 2 &&
2075                *eptr == NLBLOCK->nl[0])
2076              {
2077              md->hitend = TRUE;
2078              if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2079              }
2080            RRETURN(MATCH_NOMATCH);
2081            }
2082          }
2083      else      else
2084        {        {
2085        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
# Line 2068  for (;;) Line 2111  for (;;)
2111      ASSERT_NL_OR_EOS:      ASSERT_NL_OR_EOS:
2112      if (eptr < md->end_subject &&      if (eptr < md->end_subject &&
2113          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
2114          {
2115          if (md->partial != 0 &&
2116              eptr + 1 >= md->end_subject &&
2117              NLBLOCK->nltype == NLTYPE_FIXED &&
2118              NLBLOCK->nllen == 2 &&
2119              *eptr == NLBLOCK->nl[0])
2120            {
2121            md->hitend = TRUE;
2122            if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2123            }
2124        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2125          }
2126    
2127      /* Either at end of string or \n before end. */      /* Either at end of string or \n before end. */
2128    
# Line 2196  for (;;) Line 2250  for (;;)
2250        }        }
2251      break;      break;
2252    
2253      /* Match a single character type; inline for speed */      /* Match any single character type except newline; have to take care with
2254        CRLF newlines and partial matching. */
2255    
2256      case OP_ANY:      case OP_ANY:
2257      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2258        if (md->partial != 0 &&
2259            eptr + 1 >= md->end_subject &&
2260            NLBLOCK->nltype == NLTYPE_FIXED &&
2261            NLBLOCK->nllen == 2 &&
2262            *eptr == NLBLOCK->nl[0])
2263          {
2264          md->hitend = TRUE;
2265          if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2266          }
2267    
2268      /* Fall through */      /* Fall through */
2269    
2270        /* Match any single character whatsoever. */
2271    
2272      case OP_ALLANY:      case OP_ALLANY:
2273      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */      if (eptr >= md->end_subject)   /* DO NOT merge the eptr++ here; it must */
2274        {                            /* not be updated before SCHECK_PARTIAL. */        {                            /* not be updated before SCHECK_PARTIAL. */
# Line 2342  for (;;) Line 2409  for (;;)
2409        default: RRETURN(MATCH_NOMATCH);        default: RRETURN(MATCH_NOMATCH);
2410    
2411        case 0x000d:        case 0x000d:
2412        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr >= md->end_subject)
2413            {
2414            SCHECK_PARTIAL();
2415            }
2416          else if (*eptr == 0x0a) eptr++;
2417        break;        break;
2418    
2419        case 0x000a:        case 0x000a:
# Line 2572  for (;;) Line 2643  for (;;)
2643        if (UCD_CATEGORY(c) != ucp_M) break;        if (UCD_CATEGORY(c) != ucp_M) break;
2644        eptr += len;        eptr += len;
2645        }        }
2646        CHECK_PARTIAL();
2647      ecode++;      ecode++;
2648      break;      break;
2649  #endif  #endif
# Line 2637  for (;;) Line 2709  for (;;)
2709        default:               /* No repeat follows */        default:               /* No repeat follows */
2710        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2711          {          {
2712            if (length == -2) eptr = md->end_subject;   /* Partial match */
2713          CHECK_PARTIAL();          CHECK_PARTIAL();
2714          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2715          }          }
# Line 2662  for (;;) Line 2735  for (;;)
2735        int slength;        int slength;
2736        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)        if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2737          {          {
2738            if (slength == -2) eptr = md->end_subject;   /* Partial match */
2739          CHECK_PARTIAL();          CHECK_PARTIAL();
2740          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2741          }          }
# Line 2685  for (;;) Line 2759  for (;;)
2759          if (fi >= max) RRETURN(MATCH_NOMATCH);          if (fi >= max) RRETURN(MATCH_NOMATCH);
2760          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2761            {            {
2762              if (slength == -2) eptr = md->end_subject;   /* Partial match */
2763            CHECK_PARTIAL();            CHECK_PARTIAL();
2764            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
2765            }            }
# Line 2703  for (;;) Line 2778  for (;;)
2778          int slength;          int slength;
2779          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2780            {            {
2781            CHECK_PARTIAL();            /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2782              the soft partial matching case. */
2783    
2784              if (slength == -2 && md->partial != 0 &&
2785                  md->end_subject > md->start_used_ptr)
2786                {
2787                md->hitend = TRUE;
2788                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2789                }
2790            break;            break;
2791            }            }
2792          eptr += slength;          eptr += slength;
2793          }          }
2794    
2795        while (eptr >= pp)        while (eptr >= pp)
2796          {          {
2797          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);          RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
# Line 3481  for (;;) Line 3565  for (;;)
3565        SCHECK_PARTIAL();        SCHECK_PARTIAL();
3566        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
3567        }        }
     ecode++;  
     GETCHARINCTEST(c, eptr);  
     if (op == OP_NOTI)         /* The caseless case */  
       {  
       register int ch, och;  
       ch = *ecode++;  
 #ifdef COMPILE_PCRE8  
       /* ch must be < 128 if UTF is enabled. */  
       och = md->fcc[ch];  
 #else  
3568  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3569        if (utf)
3570          {
3571          register unsigned int ch, och;
3572    
3573          ecode++;
3574          GETCHARINC(ch, ecode);
3575          GETCHARINC(c, eptr);
3576    
3577          if (op == OP_NOT)
3578            {
3579            if (ch == c) RRETURN(MATCH_NOMATCH);
3580            }
3581          else
3582            {
3583  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3584        if (utf && ch > 127)          if (ch > 127)
3585          och = UCD_OTHERCASE(ch);            och = UCD_OTHERCASE(ch);
3586  #else  #else
3587        if (utf && ch > 127)          if (ch > 127)
3588          och = ch;            och = ch;
3589  #endif /* SUPPORT_UCP */  #endif /* SUPPORT_UCP */
3590        else          else
3591  #endif /* SUPPORT_UTF */            och = TABLE_GET(ch, md->fcc, ch);
3592          och = TABLE_GET(ch, md->fcc, ch);          if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3593  #endif /* COMPILE_PCRE8 */          }
       if (ch == c || och == c) RRETURN(MATCH_NOMATCH);  
3594        }        }
3595      else    /* Caseful */      else
3596    #endif
3597        {        {
3598        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        register unsigned int ch = ecode[1];
3599          c = *eptr++;
3600          if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3601            RRETURN(MATCH_NOMATCH);
3602          ecode += 2;
3603        }        }
3604      break;      break;
3605    
# Line 3587  for (;;) Line 3679  for (;;)
3679      /* Common code for all repeated single-byte matches. */      /* Common code for all repeated single-byte matches. */
3680    
3681      REPEATNOTCHAR:      REPEATNOTCHAR:
3682      fc = *ecode++;      GETCHARINCTEST(fc, ecode);
3683    
3684      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
3685      since matching characters is likely to be quite common. First, ensure the      since matching characters is likely to be quite common. First, ensure the
# Line 3602  for (;;) Line 3694  for (;;)
3694    
3695      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3696        {        {
 #ifdef COMPILE_PCRE8  
       /* fc must be < 128 if UTF is enabled. */  
       foc = md->fcc[fc];  
 #else  
3697  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3698  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3699        if (utf && fc > 127)        if (utf && fc > 127)
# Line 3617  for (;;) Line 3705  for (;;)
3705        else        else
3706  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
3707          foc = TABLE_GET(fc, md->fcc, fc);          foc = TABLE_GET(fc, md->fcc, fc);
 #endif /* COMPILE_PCRE8 */  
3708    
3709  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3710        if (utf)        if (utf)
# Line 3631  for (;;) Line 3718  for (;;)
3718              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3719              }              }
3720            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3721            if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);            if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3722            }            }
3723          }          }
3724        else        else
# Line 3669  for (;;) Line 3756  for (;;)
3756                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3757                }                }
3758              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3759              if (fc == d || foc == d) RRETURN(MATCH_NOMATCH);              if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3760              }              }
3761            }            }
3762          else          else
# Line 3712  for (;;) Line 3799  for (;;)
3799                break;                break;
3800                }                }
3801              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3802              if (fc == d || foc == d) break;              if (fc == d || (unsigned int)foc == d) break;
3803              eptr += len;              eptr += len;
3804              }              }
3805          if (possessive) continue;            if (possessive) continue;
3806          for(;;)            for(;;)
3807              {              {
3808              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3809              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 4141  for (;;) Line 4228  for (;;)
4228              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
4229              eptr += len;              eptr += len;
4230              }              }
4231              CHECK_PARTIAL();
4232            }            }
4233          }          }
4234    
# Line 4161  for (;;) Line 4249  for (;;)
4249              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4250              }              }
4251            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4252              if (md->partial != 0 &&
4253                  eptr + 1 >= md->end_subject &&
4254                  NLBLOCK->nltype == NLTYPE_FIXED &&
4255                  NLBLOCK->nllen == 2 &&
4256                  *eptr == NLBLOCK->nl[0])
4257                {
4258                md->hitend = TRUE;
4259                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4260                }
4261            eptr++;            eptr++;
4262            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);            ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4263            }            }
# Line 4445  for (;;) Line 4542  for (;;)
4542              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
4543              }              }
4544            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4545              if (md->partial != 0 &&
4546                  eptr + 1 >= md->end_subject &&
4547                  NLBLOCK->nltype == NLTYPE_FIXED &&
4548                  NLBLOCK->nllen == 2 &&
4549                  *eptr == NLBLOCK->nl[0])
4550                {
4551                md->hitend = TRUE;
4552                if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4553                }
4554            eptr++;            eptr++;
4555            }            }
4556          break;          break;
# Line 4925  for (;;) Line 5031  for (;;)
5031              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
5032              eptr += len;              eptr += len;
5033              }              }
5034              CHECK_PARTIAL();
5035            }            }
5036          }          }
5037        else        else
# Line 4948  for (;;) Line 5055  for (;;)
5055            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
5056            switch(ctype)            switch(ctype)
5057              {              {
5058              case OP_ANY:        /* This is the non-NL case */              case OP_ANY:               /* This is the non-NL case */
5059                if (md->partial != 0 &&    /* Take care with CRLF partial */
5060                    eptr >= md->end_subject &&
5061                    NLBLOCK->nltype == NLTYPE_FIXED &&
5062                    NLBLOCK->nllen == 2 &&
5063                    c == NLBLOCK->nl[0])
5064                  {
5065                  md->hitend = TRUE;
5066                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5067                  }
5068                break;
5069    
5070              case OP_ALLANY:              case OP_ALLANY:
5071              case OP_ANYBYTE:              case OP_ANYBYTE:
5072              break;              break;
# Line 5111  for (;;) Line 5229  for (;;)
5229            c = *eptr++;            c = *eptr++;
5230            switch(ctype)            switch(ctype)
5231              {              {
5232              case OP_ANY:     /* This is the non-NL case */              case OP_ANY:               /* This is the non-NL case */
5233                if (md->partial != 0 &&    /* Take care with CRLF partial */
5234                    eptr >= md->end_subject &&
5235                    NLBLOCK->nltype == NLTYPE_FIXED &&
5236                    NLBLOCK->nllen == 2 &&
5237                    c == NLBLOCK->nl[0])
5238                  {
5239                  md->hitend = TRUE;
5240                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5241                  }
5242                break;
5243    
5244              case OP_ALLANY:              case OP_ALLANY:
5245              case OP_ANYBYTE:              case OP_ANYBYTE:
5246              break;              break;
# Line 5468  for (;;) Line 5597  for (;;)
5597              if (UCD_CATEGORY(c) != ucp_M) break;              if (UCD_CATEGORY(c) != ucp_M) break;
5598              eptr += len;              eptr += len;
5599              }              }
5600              CHECK_PARTIAL();
5601            }            }
5602    
5603          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
# Line 5511  for (;;) Line 5641  for (;;)
5641                  break;                  break;
5642                  }                  }
5643                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5644                  if (md->partial != 0 &&    /* Take care with CRLF partial */
5645                      eptr + 1 >= md->end_subject &&
5646                      NLBLOCK->nltype == NLTYPE_FIXED &&
5647                      NLBLOCK->nllen == 2 &&
5648                      *eptr == NLBLOCK->nl[0])
5649                    {
5650                    md->hitend = TRUE;
5651                    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5652                    }
5653                eptr++;                eptr++;
5654                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5655                }                }
# Line 5528  for (;;) Line 5667  for (;;)
5667                  break;                  break;
5668                  }                  }
5669                if (IS_NEWLINE(eptr)) break;                if (IS_NEWLINE(eptr)) break;
5670                  if (md->partial != 0 &&    /* Take care with CRLF partial */
5671                      eptr + 1 >= md->end_subject &&
5672                      NLBLOCK->nltype == NLTYPE_FIXED &&
5673                      NLBLOCK->nllen == 2 &&
5674                      *eptr == NLBLOCK->nl[0])
5675                    {
5676                    md->hitend = TRUE;
5677                    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5678                    }
5679                eptr++;                eptr++;
5680                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);                ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5681                }                }
# Line 5792  for (;;) Line 5940  for (;;)
5940                break;                break;
5941                }                }
5942              if (IS_NEWLINE(eptr)) break;              if (IS_NEWLINE(eptr)) break;
5943                if (md->partial != 0 &&    /* Take care with CRLF partial */
5944                    eptr + 1 >= md->end_subject &&
5945                    NLBLOCK->nltype == NLTYPE_FIXED &&
5946                    NLBLOCK->nllen == 2 &&
5947                    *eptr == NLBLOCK->nl[0])
5948                  {
5949                  md->hitend = TRUE;
5950                  if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5951                  }
5952              eptr++;              eptr++;
5953              }              }
5954            break;            break;
# Line 6184  PCRE_PUCHAR req_char_ptr = start_match - Line 6341  PCRE_PUCHAR req_char_ptr = start_match -
6341  const pcre_study_data *study;  const pcre_study_data *study;
6342  const REAL_PCRE *re = (const REAL_PCRE *)argument_re;  const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6343    
6344    /* Check for the special magic call that measures the size of the stack used
6345    per recursive call of match(). Without the funny casting for sizeof, a Windows
6346    compiler gave this error: "unary minus operator applied to unsigned type,
6347    result still unsigned". Hopefully the cast fixes that. */
6348    
6349    if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
6350        start_offset == -999)
6351    #ifdef NO_RECURSE
6352      return -((int)sizeof(heapframe));
6353    #else
6354      return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6355    #endif
6356    
6357  /* Plausibility checks */  /* Plausibility checks */
6358    
6359  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6360  if (re == NULL || subject == NULL ||  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6361     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6362  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6363  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6364    
6365    /* Check that the first field in the block is the magic number. If it is not,
6366    return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6367    REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6368    means that the pattern is likely compiled with different endianness. */
6369    
6370    if (re->magic_number != MAGIC_NUMBER)
6371      return re->magic_number == REVERSED_MAGIC_NUMBER?
6372        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6373    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6374    
6375  /* These two settings are used in the code for checking a UTF-8 string that  /* These two settings are used in the code for checking a UTF-8 string that
6376  follows immediately afterwards. Other values in the md block are used only  follows immediately afterwards. Other values in the md block are used only
6377  during "normal" pcre_exec() processing, not when the JIT support is in use,  during "normal" pcre_exec() processing, not when the JIT support is in use,
# Line 6236  if (utf && (options & PCRE_NO_UTF8_CHECK Line 6416  if (utf && (options & PCRE_NO_UTF8_CHECK
6416  /* If the pattern was successfully studied with JIT support, run the JIT  /* If the pattern was successfully studied with JIT support, run the JIT
6417  executable instead of the rest of this function. Most options must be set at  executable instead of the rest of this function. Most options must be set at
6418  compile time for the JIT code to be usable. Fallback to the normal code path if  compile time for the JIT code to be usable. Fallback to the normal code path if
6419  an unsupported flag is set. In particular, JIT does not support partial  an unsupported flag is set. */
 matching. */  
6420    
6421  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
6422  if (extra_data != NULL  if (extra_data != NULL
6423      && (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6424                                 PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6425      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
     && (extra_data->flags & PCRE_EXTRA_TABLES) == 0  
6426      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |
6427                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0)                      PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |
6428    return PRIV(jit_exec)(re, extra_data->executable_jit,                      PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)
6429      {
6430      rc = PRIV(jit_exec)(re, extra_data->executable_jit,
6431      (const pcre_uchar *)subject, length, start_offset, options,      (const pcre_uchar *)subject, length, start_offset, options,
6432      ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)      ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0)
6433      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);      ? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount);
6434    
6435      /* PCRE_ERROR_NULL means that the selected normal or partial matching
6436      mode is not compiled. In this case we simply fallback to interpreter. */
6437    
6438      if (rc != PCRE_ERROR_NULL) return rc;
6439      }
6440  #endif  #endif
6441    
6442  /* Carry on with non-JIT matching. This information is for finding all the  /* Carry on with non-JIT matching. This information is for finding all the
# Line 6291  in other programs later. */ Line 6478  in other programs later. */
6478    
6479  if (tables == NULL) tables = PRIV(default_tables);  if (tables == NULL) tables = PRIV(default_tables);
6480    
 /* Check that the first field in the block is the magic number. If it is not,  
 return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to  
 REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which  
 means that the pattern is likely compiled with different endianness. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   return re->magic_number == REVERSED_MAGIC_NUMBER?  
     PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;  
 if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;  
   
6481  /* Set up other data */  /* Set up other data */
6482    
6483  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
# Line 6462  if (!anchored) Line 6639  if (!anchored)
6639    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
6640      {      {
6641      has_first_char = TRUE;      has_first_char = TRUE;
6642      first_char = first_char2 = re->first_char;      first_char = first_char2 = (pcre_uchar)(re->first_char);
6643      if ((re->flags & PCRE_FCH_CASELESS) != 0)      if ((re->flags & PCRE_FCH_CASELESS) != 0)
6644        {        {
6645        first_char2 = TABLE_GET(first_char, md->fcc, first_char);        first_char2 = TABLE_GET(first_char, md->fcc, first_char);
# Line 6484  character" set. */ Line 6661  character" set. */
6661  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6662    {    {
6663    has_req_char = TRUE;    has_req_char = TRUE;
6664    req_char = req_char2 = re->req_char;    req_char = req_char2 = (pcre_uchar)(re->req_char);
6665    if ((re->flags & PCRE_RCH_CASELESS) != 0)    if ((re->flags & PCRE_RCH_CASELESS) != 0)
6666      {      {
6667      req_char2 = TABLE_GET(req_char, md->fcc, req_char);      req_char2 = TABLE_GET(req_char, md->fcc, req_char);

Legend:
Removed from v.882  
changed lines
  Added in v.926

  ViewVC Help
Powered by ViewVC 1.1.5