/[pcre]/code/branches/pcre16/pcre_exec.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 788 by zherczeg, Tue Dec 6 11:33:41 2011 UTC revision 789 by zherczeg, Wed Dec 7 14:36:26 2011 UTC
# Line 181  ASCII characters. */ Line 181  ASCII characters. */
181    
182  if (caseless)  if (caseless)
183    {    {
184  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
185  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
186    if (md->utf)    if (md->utf)
187      {      {
# Line 365  typedef struct heapframe { Line 365  typedef struct heapframe {
365    /* Function local variables */    /* Function local variables */
366    
367    PCRE_PUCHAR Xcallpat;    PCRE_PUCHAR Xcallpat;
368  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
369    PCRE_PUCHAR Xcharptr;    PCRE_PUCHAR Xcharptr;
370  #endif  #endif
371    PCRE_PUCHAR Xdata;    PCRE_PUCHAR Xdata;
# Line 527  HEAP_RECURSE: Line 527  HEAP_RECURSE:
527    
528  /* Ditto for the local variables */  /* Ditto for the local variables */
529    
530  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
531  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
532  #endif  #endif
533  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
# Line 585  declarations can be cut out in a block. Line 585  declarations can be cut out in a block.
585  below are for variables that do not have to be preserved over a recursive call  below are for variables that do not have to be preserved over a recursive call
586  to RMATCH(). */  to RMATCH(). */
587    
588  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
589  const pcre_uchar *charptr;  const pcre_uchar *charptr;
590  #endif  #endif
591  const pcre_uchar *callpat;  const pcre_uchar *callpat;
# Line 634  the alternative names that are used. */ Line 634  the alternative names that are used. */
634  #define code_offset   codelink  #define code_offset   codelink
635  #define condassert    condition  #define condassert    condition
636  #define matched_once  prev_is_word  #define matched_once  prev_is_word
637    #define foc           number
638    
639  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
640  variables. */  variables. */
# Line 659  defined). However, RMATCH isn't like a f Line 660  defined). However, RMATCH isn't like a f
660  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
661  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
662    
663  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
664  utf = md->utf;       /* Local copy of the flag */  utf = md->utf;       /* Local copy of the flag */
665  #else  #else
666  utf = FALSE;  utf = FALSE;
# Line 1596  for (;;) Line 1597  for (;;)
1597      back a number of characters, not bytes. */      back a number of characters, not bytes. */
1598    
1599      case OP_REVERSE:      case OP_REVERSE:
1600  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1601      if (utf)      if (utf)
1602        {        {
1603        i = GET(ecode, 1);        i = GET(ecode, 1);
# Line 2216  for (;;) Line 2217  for (;;)
2217        }        }
2218      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2219      if (      if (
2220  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2221         c < 256 &&         c < 256 &&
2222  #endif  #endif
2223         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
# Line 2233  for (;;) Line 2234  for (;;)
2234        }        }
2235      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2236      if (      if (
2237  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2238         c >= 256 ||         c > 255 ||
2239  #endif  #endif
2240         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
2241         )         )
# Line 2250  for (;;) Line 2251  for (;;)
2251        }        }
2252      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2253      if (      if (
2254  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2255         c < 256 &&         c < 256 &&
2256  #endif  #endif
2257         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
# Line 2267  for (;;) Line 2268  for (;;)
2268        }        }
2269      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2270      if (      if (
2271  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2272         c >= 256 ||         c > 255 ||
2273  #endif  #endif
2274         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
2275         )         )
# Line 2284  for (;;) Line 2285  for (;;)
2285        }        }
2286      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2287      if (      if (
2288  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2289         c < 256 &&         c < 256 &&
2290  #endif  #endif
2291         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
# Line 2301  for (;;) Line 2302  for (;;)
2302        }        }
2303      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2304      if (      if (
2305  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2306         c >= 256 ||         c > 255 ||
2307  #endif  #endif
2308         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
2309         )         )
# Line 3036  for (;;) Line 3037  for (;;)
3037      /* Match a single character, casefully */      /* Match a single character, casefully */
3038    
3039      case OP_CHAR:      case OP_CHAR:
3040  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3041      if (utf)      if (utf)
3042        {        {
3043        length = 1;        length = 1;
# Line 3108  for (;;) Line 3109  for (;;)
3109          }          }
3110        }        }
3111      else      else
3112  #endif   /* SUPPORT_UTF8 */  #endif   /* SUPPORT_UTF */
3113    
3114      /* Not UTF mode */      /* Not UTF mode */
3115        {        {
# Line 3117  for (;;) Line 3118  for (;;)
3118          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
3119          MRRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3120          }          }
3121        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);        if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3122              != TABLE_GET(*eptr, md->lcc, *eptr)) MRRETURN(MATCH_NOMATCH);
3123          eptr++;
3124        ecode += 2;        ecode += 2;
3125        }        }
3126      break;      break;
# Line 3190  for (;;) Line 3193  for (;;)
3193      /* Common code for all repeated single-character matches. */      /* Common code for all repeated single-character matches. */
3194    
3195      REPEATCHAR:      REPEATCHAR:
3196  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3197      if (utf)      if (utf)
3198        {        {
3199        length = 1;        length = 1;
# Line 3214  for (;;) Line 3217  for (;;)
3217          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3218            {            {
3219            if (eptr <= md->end_subject - length &&            if (eptr <= md->end_subject - length &&
3220              memcmp(eptr, charptr, length) == 0) eptr += length;              memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3221  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3222            else if (oclength > 0 &&            else if (oclength > 0 &&
3223                     eptr <= md->end_subject - oclength &&                     eptr <= md->end_subject - oclength &&
# Line 3237  for (;;) Line 3240  for (;;)
3240              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3241              if (fi >= max) MRRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3242              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3243                memcmp(eptr, charptr, length) == 0) eptr += length;                memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3244  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3245              else if (oclength > 0 &&              else if (oclength > 0 &&
3246                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
# Line 3258  for (;;) Line 3261  for (;;)
3261            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3262              {              {
3263              if (eptr <= md->end_subject - length &&              if (eptr <= md->end_subject - length &&
3264                  memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3265  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3266              else if (oclength > 0 &&              else if (oclength > 0 &&
3267                       eptr <= md->end_subject - oclength &&                       eptr <= md->end_subject - oclength &&
# Line 3294  for (;;) Line 3297  for (;;)
3297        value of fc will always be < 128. */        value of fc will always be < 128. */
3298        }        }
3299      else      else
3300  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
3301          /* When not in UTF-8 mode, load a single-byte character. */
3302          fc = *ecode++;
3303    
3304      /* When not in UTF-8 mode, load a single-byte character. */      /* The value of fc at this point is always one character, though we may
3305        or may not be in UTF mode. The code is duplicated for the caseless and
     fc = *ecode++;  
   
     /* The value of fc at this point is always less than 256, though we may or  
     may not be in UTF-8 mode. The code is duplicated for the caseless and  
3306      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
3307      common. First, ensure the minimum number of matches are present. If min =      common. First, ensure the minimum number of matches are present. If min =
3308      max, continue at the same level without recursing. Otherwise, if      max, continue at the same level without recursing. Otherwise, if
# Line 3314  for (;;) Line 3315  for (;;)
3315    
3316      if (op >= OP_STARI)  /* Caseless */      if (op >= OP_STARI)  /* Caseless */
3317        {        {
3318        fc = md->lcc[fc];  #ifdef COMPILE_PCRE8
3319          /* fc must be < 128 */
3320          foc = md->fcc[fc];
3321    #else
3322    #ifdef SUPPORT_UTF
3323    #ifdef SUPPORT_UCP
3324          if (utf && fc > 127)
3325            foc = UCD_OTHERCASE(fc);
3326    #else
3327          if (utf && fc > 127)
3328            foc = fc;
3329    #endif /* SUPPORT_UCP */
3330          else
3331    #endif /* SUPPORT_UTF */
3332            foc = TABLE_GET(fc, md->fcc, fc);
3333    #endif /* COMPILE_PCRE8 */
3334    
3335        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3336          {          {
3337          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
# Line 3322  for (;;) Line 3339  for (;;)
3339            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3340            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
3341            }            }
3342          if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);          if (fc != *eptr && foc != *eptr) MRRETURN(MATCH_NOMATCH);
3343            eptr++;
3344          }          }
3345        if (min == max) continue;        if (min == max) continue;
3346        if (minimize)        if (minimize)
# Line 3337  for (;;) Line 3355  for (;;)
3355              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3356              MRRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3357              }              }
3358            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);            if (fc != *eptr && foc != *eptr) MRRETURN(MATCH_NOMATCH);
3359              eptr++;
3360            }            }
3361          /* Control never gets here */          /* Control never gets here */
3362          }          }
# Line 3351  for (;;) Line 3370  for (;;)
3370              SCHECK_PARTIAL();              SCHECK_PARTIAL();
3371              break;              break;
3372              }              }
3373            if (fc != md->lcc[*eptr]) break;            if (fc != *eptr && foc != *eptr) break;
3374            eptr++;            eptr++;
3375            }            }
3376    
# Line 3440  for (;;) Line 3459  for (;;)
3459      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3460      if (op == OP_NOTI)         /* The caseless case */      if (op == OP_NOTI)         /* The caseless case */
3461        {        {
3462  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
3463        if (c < 256)        if (c < 256)
3464  #endif  #endif
3465        c = md->lcc[c];          c = md->lcc[c];
3466        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3467        }        }
3468      else    /* Caseful */      else    /* Caseful */
# Line 3543  for (;;) Line 3562  for (;;)
3562    
3563      if (op >= OP_NOTSTARI)     /* Caseless */      if (op >= OP_NOTSTARI)     /* Caseless */
3564        {        {
3565        fc = md->lcc[fc];        fc = TABLE_GET(fc, md->lcc, fc);
3566    
3567  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3568        if (utf)        if (utf)
3569          {          {
3570          register unsigned int d;          register unsigned int d;
# Line 3580  for (;;) Line 3599  for (;;)
3599    
3600        if (minimize)        if (minimize)
3601          {          {
3602  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3603          if (utf)          if (utf)
3604            {            {
3605            register unsigned int d;            register unsigned int d;
# Line 3625  for (;;) Line 3644  for (;;)
3644          {          {
3645          pp = eptr;          pp = eptr;
3646    
3647  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3648          if (utf)          if (utf)
3649            {            {
3650            register unsigned int d;            register unsigned int d;
# Line 3683  for (;;) Line 3702  for (;;)
3702    
3703      else      else
3704        {        {
3705  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3706        if (utf)        if (utf)
3707          {          {
3708          register unsigned int d;          register unsigned int d;
# Line 3717  for (;;) Line 3736  for (;;)
3736    
3737        if (minimize)        if (minimize)
3738          {          {
3739  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3740          if (utf)          if (utf)
3741            {            {
3742            register unsigned int d;            register unsigned int d;
# Line 3761  for (;;) Line 3780  for (;;)
3780          {          {
3781          pp = eptr;          pp = eptr;
3782    
3783  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3784          if (utf)          if (utf)
3785            {            {
3786            register unsigned int d;            register unsigned int d;
# Line 4353  for (;;) Line 4372  for (;;)
4372          }  /* End switch(ctype) */          }  /* End switch(ctype) */
4373    
4374        else        else
4375  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF */
4376    
4377        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4378        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. */
# Line 4796  for (;;) Line 4815  for (;;)
4815        else        else
4816  #endif     /* SUPPORT_UCP */  #endif     /* SUPPORT_UCP */
4817    
4818  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4819        if (utf)        if (utf)
4820          {          {
4821          for (fi = min;; fi++)          for (fi = min;; fi++)
# Line 5596  for (;;) Line 5615  for (;;)
5615            }            }
5616          }          }
5617        else        else
5618  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
5619        /* Not UTF mode */        /* Not UTF mode */
5620          {          {
5621          switch(ctype)          switch(ctype)
# Line 5844  switch (frame->Xwhere) Line 5863  switch (frame->Xwhere)
5863    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5864    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
5865    LBL(65) LBL(66)    LBL(65) LBL(66)
5866  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
5867    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5868    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
5869  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5870    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5871    LBL(59) LBL(60) LBL(61) LBL(62)    LBL(59) LBL(60) LBL(61) LBL(62)
5872  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
5873  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
5874    default:    default:
5875    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5876    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 6002  md->partial = ((options & PCRE_PARTIAL_H Line 6021  md->partial = ((options & PCRE_PARTIAL_H
6021  /* Check a UTF-8 string if required. Pass back the character offset and error  /* Check a UTF-8 string if required. Pass back the character offset and error
6022  code for an invalid string if a results vector is available. */  code for an invalid string if a results vector is available. */
6023    
6024  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
6025  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6026    {    {
6027    int erroroffset;    int erroroffset;
# Line 6138  md->recursive = NULL; Line 6157  md->recursive = NULL;
6157  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;  md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6158    
6159  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
6160    md->fcc = tables + fcc_offset;
6161  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
6162    
6163  /* Handle different \R options. */  /* Handle different \R options. */
# Line 6265  if (!anchored) Line 6285  if (!anchored)
6285      first_char = first_char2 = re->first_char;      first_char = first_char2 = re->first_char;
6286      if ((re->flags & PCRE_FCH_CASELESS) != 0)      if ((re->flags & PCRE_FCH_CASELESS) != 0)
6287        {        {
6288        first_char2 = TABLE_GET(first_char, tables + fcc_offset, first_char);        first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6289  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6290        if (utf && first_char > 127)        if (utf && first_char > 127)
6291          first_char2 = UCD_OTHERCASE(first_char);          first_char2 = UCD_OTHERCASE(first_char);
# Line 6287  if ((re->flags & PCRE_REQCHSET) != 0) Line 6307  if ((re->flags & PCRE_REQCHSET) != 0)
6307    req_char = req_char2 = re->req_char;    req_char = req_char2 = re->req_char;
6308    if ((re->flags & PCRE_RCH_CASELESS) != 0)    if ((re->flags & PCRE_RCH_CASELESS) != 0)
6309      {      {
6310      req_char2 = TABLE_GET(req_char, tables + fcc_offset, req_char);      req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6311  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6312      if (utf && req_char > 127)      if (utf && req_char > 127)
6313        req_char2 = UCD_OTHERCASE(req_char);        req_char2 = UCD_OTHERCASE(req_char);

Legend:
Removed from v.788  
changed lines
  Added in v.789

  ViewVC Help
Powered by ViewVC 1.1.5