/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1102 by chpe, Tue Oct 16 15:56:34 2012 UTC revision 1260 by ph10, Wed Feb 27 15:41:22 2013 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2012 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 56  possible. There are also some static sup Line 56  possible. There are also some static sup
56  #undef min  #undef min
57  #undef max  #undef max
58    
59    /* The md->capture_last field uses the lower 16 bits for the last captured
60    substring (which can never be greater than 65535) and a bit in the top half
61    to mean "capture vector overflowed". This odd way of doing things was
62    implemented when it was realized that preserving and restoring the overflow bit
63    whenever the last capture number was saved/restored made for a neater
64    interface, and doing it this way saved on (a) another variable, which would
65    have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66    separate set of save/restore instructions. The following defines are used in
67    implementing this. */
68    
69    #define CAPLMASK    0x0000ffff    /* The bits used for last_capture */
70    #define OVFLMASK    0xffff0000    /* The bits used for the overflow flag */
71    #define OVFLBIT     0x00010000    /* The bit that is set for overflow */
72    
73  /* Values for setting in md->match_function_type to indicate two special types  /* Values for setting in md->match_function_type to indicate two special types
74  of call to match(). We do it this way to save on using another stack variable,  of call to match(). We do it this way to save on using another stack variable,
75  as stack usage is to be discouraged. */  as stack usage is to be discouraged. */
# Line 149  match_ref(int offset, register PCRE_PUCH Line 163  match_ref(int offset, register PCRE_PUCH
163  {  {
164  PCRE_PUCHAR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
165  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
166    #ifdef SUPPORT_UTF
167  BOOL utf = md->utf;  BOOL utf = md->utf;
168    #endif
169    
170  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
171  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 197  if (caseless) Line 213  if (caseless)
213        GETCHARINC(c, eptr);        GETCHARINC(c, eptr);
214        GETCHARINC(d, p);        GETCHARINC(d, p);
215        ur = GET_UCD(d);        ur = GET_UCD(d);
216        if (c != d && c != d + ur->other_case)        if (c != d && c != d + ur->other_case)
217          {          {
218          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;          const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
219          for (;;)          for (;;)
220            {            {
221            if (c < *pp) return -1;            if (c < *pp) return -1;
# Line 217  if (caseless) Line 233  if (caseless)
233      {      {
234      while (length-- > 0)      while (length-- > 0)
235        {        {
236        pcre_uchar cc, cp;        pcre_uint32 cc, cp;
237        if (eptr >= md->end_subject) return -2;   /* Partial match */        if (eptr >= md->end_subject) return -2;   /* Partial match */
238        cc = RAWUCHARTEST(eptr);        cc = RAWUCHARTEST(eptr);
239        cp = RAWUCHARTEST(p);        cp = RAWUCHARTEST(p);
# Line 292  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 308  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
308         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
309         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
310         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
311         RM61,  RM62, RM63, RM64, RM65, RM66 };         RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };
312    
313  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
314  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 401  typedef struct heapframe { Line 417  typedef struct heapframe {
417    
418  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
419    int Xprop_type;    int Xprop_type;
420    int Xprop_value;    unsigned int Xprop_value;
421    int Xprop_fail_result;    int Xprop_fail_result;
422    int Xoclength;    int Xoclength;
423    pcre_uchar Xocchars[6];    pcre_uchar Xocchars[6];
# Line 414  typedef struct heapframe { Line 430  typedef struct heapframe {
430    int Xlength;    int Xlength;
431    int Xmax;    int Xmax;
432    int Xmin;    int Xmin;
433    int Xnumber;    unsigned int Xnumber;
434    int Xoffset;    int Xoffset;
435    int Xop;    unsigned int Xop;
436    int Xsave_capture_last;    pcre_int32 Xsave_capture_last;
437    int Xsave_offset1, Xsave_offset2, Xsave_offset3;    int Xsave_offset1, Xsave_offset2, Xsave_offset3;
438    int Xstacksave[REC_STACK_SAVE_MAX];    int Xstacksave[REC_STACK_SAVE_MAX];
439    
# Line 619  BOOL prev_is_word; Line 635  BOOL prev_is_word;
635    
636  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
637  int prop_type;  int prop_type;
638  int prop_value;  unsigned int prop_value;
639  int prop_fail_result;  int prop_fail_result;
640  int oclength;  int oclength;
641  pcre_uchar occhars[6];  pcre_uchar occhars[6];
# Line 630  int ctype; Line 646  int ctype;
646  int length;  int length;
647  int max;  int max;
648  int min;  int min;
649  int number;  unsigned int number;
650  int offset;  int offset;
651  pcre_uchar op;  unsigned int op;
652  int save_capture_last;  pcre_int32 save_capture_last;
653  int save_offset1, save_offset2, save_offset3;  int save_offset1, save_offset2, save_offset3;
654  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
655    
# Line 1064  for (;;) Line 1080  for (;;)
1080        /* In all other cases, we have to make another call to match(). */        /* In all other cases, we have to make another call to match(). */
1081    
1082        save_mark = md->mark;        save_mark = md->mark;
1083          save_capture_last = md->capture_last;
1084        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1085          RM2);          RM2);
1086    
# Line 1095  for (;;) Line 1112  for (;;)
1112        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1113        md->mark = save_mark;        md->mark = save_mark;
1114        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1115          md->capture_last = save_capture_last;
1116        }        }
1117    
1118      RRETURN(MATCH_NOMATCH);      RRETURN(MATCH_NOMATCH);
# Line 1216  for (;;) Line 1234  for (;;)
1234      POSSESSIVE_NON_CAPTURE:      POSSESSIVE_NON_CAPTURE:
1235      matched_once = FALSE;      matched_once = FALSE;
1236      code_offset = (int)(ecode - md->start_code);      code_offset = (int)(ecode - md->start_code);
1237        save_capture_last = md->capture_last;
1238    
1239      for (;;)      for (;;)
1240        {        {
# Line 1245  for (;;) Line 1264  for (;;)
1264        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1265        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1266        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1267          md->capture_last = save_capture_last;
1268        }        }
1269    
1270      if (matched_once || allow_zero)      if (matched_once || allow_zero)
# Line 1289  for (;;) Line 1309  for (;;)
1309          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
1310          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
1311          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
1312          cb.capture_last     = md->capture_last;          cb.capture_last     = md->capture_last & CAPLMASK;
1313            /* Internal change requires this for API compatibility. */
1314            if (cb.capture_last == 0) cb.capture_last = -1;
1315          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
1316          cb.mark             = md->nomatch_mark;          cb.mark             = md->nomatch_mark;
1317          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);          if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
# Line 1311  for (;;) Line 1333  for (;;)
1333          }          }
1334        else        else
1335          {          {
1336          int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/          unsigned int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
1337          condition = (recno == RREF_ANY || recno == md->recursive->group_num);          condition = (recno == RREF_ANY || recno == md->recursive->group_num);
1338    
1339          /* If the test is for recursion into a specific subpattern, and it is          /* If the test is for recursion into a specific subpattern, and it is
# Line 1383  for (;;) Line 1405  for (;;)
1405    
1406        if (!condition && condcode == OP_NCREF)        if (!condition && condcode == OP_NCREF)
1407          {          {
1408          int refno = offset >> 1;          unsigned int refno = offset >> 1;
1409          pcre_uchar *slotA = md->name_table;          pcre_uchar *slotA = md->name_table;
1410    
1411          for (i = 0; i < md->name_count; i++)          for (i = 0; i < md->name_count; i++)
# Line 1511  for (;;) Line 1533  for (;;)
1533      to close any currently open capturing brackets. */      to close any currently open capturing brackets. */
1534    
1535      case OP_CLOSE:      case OP_CLOSE:
1536      number = GET2(ecode, 1);      number = GET2(ecode, 1);   /* Must be less than 65536 */
1537      offset = number << 1;      offset = number << 1;
1538    
1539  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
# Line 1519  for (;;) Line 1541  for (;;)
1541        printf("\n");        printf("\n");
1542  #endif  #endif
1543    
1544      md->capture_last = number;      md->capture_last = (md->capture_last & OVFLMASK) | number;
1545      if (offset >= md->offset_max) md->offset_overflow = TRUE; else      if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1546        {        {
1547        md->offset_vector[offset] =        md->offset_vector[offset] =
1548          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
# Line 1714  for (;;) Line 1736  for (;;)
1736        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1737        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1738        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1739        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last & CAPLMASK;
1740          /* Internal change requires this for API compatibility. */
1741          if (cb.capture_last == 0) cb.capture_last = -1;
1742        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1743        cb.mark             = md->nomatch_mark;        cb.mark             = md->nomatch_mark;
1744        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
# Line 1743  for (;;) Line 1767  for (;;)
1767      case OP_RECURSE:      case OP_RECURSE:
1768        {        {
1769        recursion_info *ri;        recursion_info *ri;
1770        int recno;        unsigned int recno;
1771    
1772        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
1773        recno = (callpat == md->start_code)? 0 :        recno = (callpat == md->start_code)? 0 :
# Line 1760  for (;;) Line 1784  for (;;)
1784        /* Add to "recursing stack" */        /* Add to "recursing stack" */
1785    
1786        new_recursive.group_num = recno;        new_recursive.group_num = recno;
1787          new_recursive.saved_capture_last = md->capture_last;
1788        new_recursive.subject_position = eptr;        new_recursive.subject_position = eptr;
1789        new_recursive.prevrec = md->recursive;        new_recursive.prevrec = md->recursive;
1790        md->recursive = &new_recursive;        md->recursive = &new_recursive;
# Line 1783  for (;;) Line 1808  for (;;)
1808              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1809    
1810        /* OK, now we can do the recursion. After processing each alternative,        /* OK, now we can do the recursion. After processing each alternative,
1811        restore the offset data. If there were nested recursions, md->recursive        restore the offset data and the last captured value. If there were nested
1812        might be changed, so reset it before looping. */        recursions, md->recursive might be changed, so reset it before looping.
1813          */
1814    
1815        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1816        cbegroup = (*callpat >= OP_SBRA);        cbegroup = (*callpat >= OP_SBRA);
# Line 1795  for (;;) Line 1821  for (;;)
1821            md, eptrb, RM6);            md, eptrb, RM6);
1822          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
1823              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1824            md->capture_last = new_recursive.saved_capture_last;
1825          md->recursive = new_recursive.prevrec;          md->recursive = new_recursive.prevrec;
1826          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1827            {            {
# Line 1945  for (;;) Line 1972  for (;;)
1972    
1973        /* Deal with capturing */        /* Deal with capturing */
1974    
1975        md->capture_last = number;        md->capture_last = (md->capture_last & OVFLMASK) | number;
1976        if (offset >= md->offset_max) md->offset_overflow = TRUE; else        if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1977          {          {
1978          /* If offset is greater than offset_top, it means that we are          /* If offset is greater than offset_top, it means that we are
1979          "skipping" a capturing group, and that group's offsets must be marked          "skipping" a capturing group, and that group's offsets must be marked
# Line 2530  for (;;) Line 2557  for (;;)
2557        }        }
2558      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2559        {        {
2560        const pcre_uint32 *cp;        const pcre_uint32 *cp;
2561        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
2562    
2563        switch(ecode[1])        switch(ecode[1])
# Line 2592  for (;;) Line 2619  for (;;)
2619          break;          break;
2620    
2621          case PT_CLIST:          case PT_CLIST:
2622          cp = PRIV(ucd_caseless_sets) + prop->caseset;          cp = PRIV(ucd_caseless_sets) + ecode[2];
2623          for (;;)          for (;;)
2624            {            {
2625            if (c < *cp)            if (c < *cp)
# Line 2601  for (;;) Line 2628  for (;;)
2628              { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }              { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2629            }            }
2630          break;          break;
2631    
2632            case PT_UCNC:
2633            if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2634                 c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2635                 c >= 0xe000) == (op == OP_NOTPROP))
2636              RRETURN(MATCH_NOMATCH);
2637            break;
2638    
2639          /* This should never occur */          /* This should never occur */
2640    
# Line 3188  for (;;) Line 3222  for (;;)
3222    
3223        if (fc < 128)        if (fc < 128)
3224          {          {
3225          pcre_uchar cc = RAWUCHAR(eptr);          pcre_uint32 cc = RAWUCHAR(eptr);
3226          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);          if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3227          ecode++;          ecode++;
3228          eptr++;          eptr++;
# Line 3437  for (;;) Line 3471  for (;;)
3471    
3472        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3473          {          {
3474          pcre_uchar cc;          pcre_uint32 cc;                 /* Faster than pcre_uchar */
   
3475          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
3476            {            {
3477            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 3453  for (;;) Line 3486  for (;;)
3486          {          {
3487          for (fi = min;; fi++)          for (fi = min;; fi++)
3488            {            {
3489            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3490            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3491            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3492            if (fi >= max) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
# Line 3474  for (;;) Line 3506  for (;;)
3506          pp = eptr;          pp = eptr;
3507          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3508            {            {
3509            pcre_uchar cc;            pcre_uint32 cc;               /* Faster than pcre_uchar */
   
3510            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
3511              {              {
3512              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4201  for (;;) Line 4232  for (;;)
4232                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4233              }              }
4234            break;            break;
4235    
4236            case PT_CLIST:            case PT_CLIST:
4237            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
4238              {              {
4239              const pcre_uint32 *cp;              const pcre_uint32 *cp;
4240              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
4241                {                {
# Line 4212  for (;;) Line 4243  for (;;)
4243                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
4244                }                }
4245              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
4246              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
4247              for (;;)              for (;;)
4248                {                {
4249                if (c < *cp)                if (c < *cp)
4250                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }                  { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4251                if (c == *cp++)                if (c == *cp++)
4252                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }                  { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4253                }                }
4254              }              }
4255            break;            break;
4256    
4257              case PT_UCNC:
4258              for (i = 1; i <= min; i++)
4259                {
4260                if (eptr >= md->end_subject)
4261                  {
4262                  SCHECK_PARTIAL();
4263                  RRETURN(MATCH_NOMATCH);
4264                  }
4265                GETCHARINCTEST(c, eptr);
4266                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4267                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4268                     c >= 0xe000) == prop_fail_result)
4269                  RRETURN(MATCH_NOMATCH);
4270                }
4271              break;
4272    
4273            /* This should not occur */            /* This should not occur */
4274    
4275            default:            default:
# Line 4428  for (;;) Line 4475  for (;;)
4475          case OP_DIGIT:          case OP_DIGIT:
4476          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4477            {            {
4478            pcre_uchar cc;            pcre_uint32 cc;
   
4479            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4480              {              {
4481              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4446  for (;;) Line 4492  for (;;)
4492          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4493          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4494            {            {
4495            pcre_uchar cc;            pcre_uint32 cc;
   
4496            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4497              {              {
4498              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4464  for (;;) Line 4509  for (;;)
4509          case OP_WHITESPACE:          case OP_WHITESPACE:
4510          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4511            {            {
4512            pcre_uchar cc;            pcre_uint32 cc;
   
4513            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4514              {              {
4515              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4482  for (;;) Line 4526  for (;;)
4526          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4527          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4528            {            {
4529            pcre_uchar cc;            pcre_uint32 cc;
   
4530            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4531              {              {
4532              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4500  for (;;) Line 4543  for (;;)
4543          case OP_WORDCHAR:          case OP_WORDCHAR:
4544          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4545            {            {
4546            pcre_uchar cc;            pcre_uint32 cc;
   
4547            if (eptr >= md->end_subject)            if (eptr >= md->end_subject)
4548              {              {
4549              SCHECK_PARTIAL();              SCHECK_PARTIAL();
# Line 4952  for (;;) Line 4994  for (;;)
4994    
4995            case PT_CLIST:            case PT_CLIST:
4996            for (fi = min;; fi++)            for (fi = min;; fi++)
4997              {              {
4998              const pcre_uint32 *cp;              const pcre_uint32 *cp;
4999              RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5000              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5001              if (fi >= max) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
5002              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 4963  for (;;) Line 5005  for (;;)
5005                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
5006                }                }
5007              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
5008              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
5009              for (;;)              for (;;)
5010                {                {
5011                if (c < *cp)                if (c < *cp)
# Line 4973  for (;;) Line 5015  for (;;)
5015                }                }
5016              }              }
5017            /* Control never gets here */            /* Control never gets here */
5018    
5019              case PT_UCNC:
5020              for (fi = min;; fi++)
5021                {
5022                RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);
5023                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5024                if (fi >= max) RRETURN(MATCH_NOMATCH);
5025                if (eptr >= md->end_subject)
5026                  {
5027                  SCHECK_PARTIAL();
5028                  RRETURN(MATCH_NOMATCH);
5029                  }
5030                GETCHARINCTEST(c, eptr);
5031                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5032                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5033                     c >= 0xe000) == prop_fail_result)
5034                  RRETURN(MATCH_NOMATCH);
5035                }
5036              /* Control never gets here */
5037    
5038            /* This should never occur */            /* This should never occur */
5039            default:            default:
# Line 5443  for (;;) Line 5504  for (;;)
5504              eptr+= len;              eptr+= len;
5505              }              }
5506            break;            break;
5507    
5508            case PT_CLIST:            case PT_CLIST:
5509            for (i = min; i < max; i++)            for (i = min; i < max; i++)
5510              {              {
# Line 5455  for (;;) Line 5516  for (;;)
5516                break;                break;
5517                }                }
5518              GETCHARLENTEST(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
5519              cp = PRIV(ucd_caseless_sets) + UCD_CASESET(c);              cp = PRIV(ucd_caseless_sets) + prop_value;
5520              for (;;)              for (;;)
5521                {                {
5522                if (c < *cp)                if (c < *cp)
5523                  { if (prop_fail_result) break; else goto GOT_MAX; }                  { if (prop_fail_result) break; else goto GOT_MAX; }
5524                if (c == *cp++)                if (c == *cp++)
5525                  { if (prop_fail_result) goto GOT_MAX; else break; }                  { if (prop_fail_result) goto GOT_MAX; else break; }
5526                }                }
5527              eptr += len;              eptr += len;
5528              }              }
5529            GOT_MAX:            GOT_MAX:
5530              break;
5531    
5532              case PT_UCNC:
5533              for (i = min; i < max; i++)
5534                {
5535                int len = 1;
5536                if (eptr >= md->end_subject)
5537                  {
5538                  SCHECK_PARTIAL();
5539                  break;
5540                  }
5541                GETCHARLENTEST(c, eptr, len);
5542                if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5543                     c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5544                     c >= 0xe000) == prop_fail_result)
5545                  break;
5546                eptr += len;
5547                }
5548            break;            break;
5549    
5550            default:            default:
# Line 6109  switch (frame->Xwhere) Line 6188  switch (frame->Xwhere)
6188    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6189  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6190    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6191    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)
6192  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6193  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
6194    default:    default:
6195    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
   
 printf("+++jump error in pcre match: label %d non-existent\n", frame->Xwhere);  
   
6196    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
6197    }    }
6198  #undef LBL  #undef LBL
# Line 6265  const pcre_uint8 *start_bits = NULL; Line 6341  const pcre_uint8 *start_bits = NULL;
6341  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;  PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6342  PCRE_PUCHAR end_subject;  PCRE_PUCHAR end_subject;
6343  PCRE_PUCHAR start_partial = NULL;  PCRE_PUCHAR start_partial = NULL;
6344    PCRE_PUCHAR match_partial;
6345  PCRE_PUCHAR req_char_ptr = start_match - 1;  PCRE_PUCHAR req_char_ptr = start_match - 1;
6346    
6347  const pcre_study_data *study;  const pcre_study_data *study;
# Line 6296  if ((options & ~PUBLIC_EXEC_OPTIONS) != Line 6373  if ((options & ~PUBLIC_EXEC_OPTIONS) !=
6373  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))  if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
6374    return PCRE_ERROR_NULL;    return PCRE_ERROR_NULL;
6375  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6376    if (length < 0) return PCRE_ERROR_BADLENGTH;
6377  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;  if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
6378    
6379  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 6362  if (extra_data != NULL Line 6440  if (extra_data != NULL
6440      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |      && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6441                               PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT                               PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6442      && extra_data->executable_jit != NULL      && extra_data->executable_jit != NULL
6443      && (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL |      && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
                     PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART |  
                     PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD)) == 0)  
6444    {    {
6445    rc = PRIV(jit_exec)(re, extra_data, (const pcre_uchar *)subject, length,    rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6446         start_offset, options, offsets, offsetcount);         start_offset, options, offsets, offsetcount);
6447    
6448    /* PCRE_ERROR_NULL means that the selected normal or partial matching    /* PCRE_ERROR_NULL means that the selected normal or partial matching
6449    mode is not compiled. In this case we simply fallback to interpreter. */    mode is not compiled. In this case we simply fallback to interpreter. */
6450    
6451    if (rc != PCRE_ERROR_NULL) return rc;    if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6452    }    }
6453  #endif  #endif
6454    
# Line 6544  if (re->top_backref > 0 && re->top_backr Line 6620  if (re->top_backref > 0 && re->top_backr
6620    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
6621    }    }
6622  else md->offset_vector = offsets;  else md->offset_vector = offsets;
   
6623  md->offset_end = ocount;  md->offset_end = ocount;
6624  md->offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
6625  md->offset_overflow = FALSE;  md->capture_last = 0;
 md->capture_last = -1;  
6626    
6627  /* Reset the working variable associated with each extraction. These should  /* Reset the working variable associated with each extraction. These should
6628  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
# Line 6819  for(;;) Line 6893  for(;;)
6893    md->match_function_type = 0;    md->match_function_type = 0;
6894    md->end_offset_top = 0;    md->end_offset_top = 0;
6895    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6896    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL)
6897        {
6898        start_partial = md->start_used_ptr;
6899        match_partial = start_match;
6900        }
6901    
6902    switch(rc)    switch(rc)
6903      {      {
# Line 6945  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7023  if (rc == MATCH_MATCH || rc == MATCH_ACC
7023          (arg_offset_max - 2) * sizeof(int));          (arg_offset_max - 2) * sizeof(int));
7024        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
7025        }        }
7026      if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE;      if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7027      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
7028      (PUBL(free))(md->offset_vector);      (PUBL(free))(md->offset_vector);
7029      }      }
# Line 6953  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 7031  if (rc == MATCH_MATCH || rc == MATCH_ACC
7031    /* Set the return code to the number of captured strings, or 0 if there were    /* Set the return code to the number of captured strings, or 0 if there were
7032    too many to fit into the vector. */    too many to fit into the vector. */
7033    
7034    rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)?    rc = ((md->capture_last & OVFLBIT) != 0 &&
7035             md->end_offset_top >= arg_offset_max)?
7036      0 : md->end_offset_top/2;      0 : md->end_offset_top/2;
7037    
7038    /* If there is space in the offset vector, set any unused pairs at the end of    /* If there is space in the offset vector, set any unused pairs at the end of
# Line 7026  if (start_partial != NULL) Line 7105  if (start_partial != NULL)
7105      {      {
7106      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);      offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7107      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);      offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7108        if (offsetcount > 2)
7109          offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7110      }      }
7111    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
7112    }    }

Legend:
Removed from v.1102  
changed lines
  Added in v.1260

  ViewVC Help
Powered by ViewVC 1.1.5