/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1376 by ph10, Sat Oct 12 18:02:11 2013 UTC revision 1414 by zherczeg, Sun Dec 22 16:27:35 2013 UTC
# Line 107  because the offset vector is always a mu Line 107  because the offset vector is always a mu
107    
108  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
109    
110  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
111  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
112    
113  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
114  /*************************************************  /*************************************************
# Line 167  match_ref(int offset, register PCRE_PUCH Line 167  match_ref(int offset, register PCRE_PUCH
167  {  {
168  PCRE_PUCHAR eptr_start = eptr;  PCRE_PUCHAR eptr_start = eptr;
169  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];  register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
170  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && defined SUPPORT_UCP
171  BOOL utf = md->utf;  BOOL utf = md->utf;
172  #endif  #endif
173    
# Line 195  ASCII characters. */ Line 195  ASCII characters. */
195    
196  if (caseless)  if (caseless)
197    {    {
198  #ifdef SUPPORT_UTF  #if defined SUPPORT_UTF && defined SUPPORT_UCP
 #ifdef SUPPORT_UCP  
199    if (utf)    if (utf)
200      {      {
201      /* Match characters up to the end of the reference. NOTE: the number of      /* Match characters up to the end of the reference. NOTE: the number of
# Line 230  if (caseless) Line 229  if (caseless)
229      }      }
230    else    else
231  #endif  #endif
 #endif  
232    
233    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there    /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
234    is no UCP support. */    is no UCP support. */
# Line 312  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM Line 310  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM
310         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
311         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
312         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
313         RM61,  RM62, RM63, RM64, RM65, RM66, RM67, RM68 };         RM61,  RM62, RM63, RM64, RM65, RM66, RM67 };
314    
315  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
316  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 1173  for (;;) Line 1171  for (;;)
1171            ecode = md->start_code + code_offset;            ecode = md->start_code + code_offset;
1172            save_capture_last = md->capture_last;            save_capture_last = md->capture_last;
1173            matched_once = TRUE;            matched_once = TRUE;
1174              mstart = md->start_match_ptr;    /* In case \K changed it */
1175            continue;            continue;
1176            }            }
1177    
# Line 1245  for (;;) Line 1244  for (;;)
1244          eptr = md->end_match_ptr;          eptr = md->end_match_ptr;
1245          ecode = md->start_code + code_offset;          ecode = md->start_code + code_offset;
1246          matched_once = TRUE;          matched_once = TRUE;
1247            mstart = md->start_match_ptr;   /* In case \K reset it */
1248          continue;          continue;
1249          }          }
1250    
# Line 2007  for (;;) Line 2007  for (;;)
2007    
2008      if (*ecode == OP_KETRPOS)      if (*ecode == OP_KETRPOS)
2009        {        {
2010          md->start_match_ptr = mstart;    /* In case \K reset it */
2011        md->end_match_ptr = eptr;        md->end_match_ptr = eptr;
2012        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
2013        RRETURN(MATCH_KETRPOS);        RRETURN(MATCH_KETRPOS);
# Line 2864  for (;;) Line 2865  for (;;)
2865          case OP_CRMINPLUS:          case OP_CRMINPLUS:
2866          case OP_CRQUERY:          case OP_CRQUERY:
2867          case OP_CRMINQUERY:          case OP_CRMINQUERY:
2868            case OP_CRPOSSTAR:
2869            case OP_CRPOSPLUS:
2870            case OP_CRPOSQUERY:
2871          c = *ecode++ - OP_CRSTAR;          c = *ecode++ - OP_CRSTAR;
2872          minimize = (c & 1) != 0;          if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
2873            else possessive = TRUE;
2874          min = rep_min[c];                 /* Pick up values from tables; */          min = rep_min[c];                 /* Pick up values from tables; */
2875          max = rep_max[c];                 /* zero for max => infinity */          max = rep_max[c];                 /* zero for max => infinity */
2876          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
# Line 2873  for (;;) Line 2878  for (;;)
2878    
2879          case OP_CRRANGE:          case OP_CRRANGE:
2880          case OP_CRMINRANGE:          case OP_CRMINRANGE:
2881            case OP_CRPOSRANGE:
2882          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
2883            possessive = (*ecode == OP_CRPOSRANGE);
2884          min = GET2(ecode, 1);          min = GET2(ecode, 1);
2885          max = GET2(ecode, 1 + IMM2_SIZE);          max = GET2(ecode, 1 + IMM2_SIZE);
2886          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
# Line 3015  for (;;) Line 3022  for (;;)
3022                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3023              eptr += len;              eptr += len;
3024              }              }
3025    
3026              if (possessive) continue;    /* No backtracking */
3027    
3028            for (;;)            for (;;)
3029              {              {
3030              RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
# Line 3045  for (;;) Line 3055  for (;;)
3055                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;                if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3056              eptr++;              eptr++;
3057              }              }
3058    
3059              if (possessive) continue;    /* No backtracking */
3060    
3061            while (eptr >= pp)            while (eptr >= pp)
3062              {              {
3063              RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
# Line 3060  for (;;) Line 3073  for (;;)
3073      /* Control never gets here */      /* Control never gets here */
3074    
3075    
3076      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. In the 8-bit library, this opcode is
3077      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8      encountered only when UTF-8 mode mode is supported. In the 16-bit and
3078      mode, because Unicode properties are supported in non-UTF-8 mode. */      32-bit libraries, codepoints greater than 255 may be encountered even when
3079        UTF is not supported. */
3080    
3081  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3082      case OP_XCLASS:      case OP_XCLASS:
# Line 3078  for (;;) Line 3092  for (;;)
3092          case OP_CRMINPLUS:          case OP_CRMINPLUS:
3093          case OP_CRQUERY:          case OP_CRQUERY:
3094          case OP_CRMINQUERY:          case OP_CRMINQUERY:
3095            case OP_CRPOSSTAR:
3096            case OP_CRPOSPLUS:
3097            case OP_CRPOSQUERY:
3098          c = *ecode++ - OP_CRSTAR;          c = *ecode++ - OP_CRSTAR;
3099          minimize = (c & 1) != 0;          if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3100            else possessive = TRUE;
3101          min = rep_min[c];                 /* Pick up values from tables; */          min = rep_min[c];                 /* Pick up values from tables; */
3102          max = rep_max[c];                 /* zero for max => infinity */          max = rep_max[c];                 /* zero for max => infinity */
3103          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
# Line 3087  for (;;) Line 3105  for (;;)
3105    
3106          case OP_CRRANGE:          case OP_CRRANGE:
3107          case OP_CRMINRANGE:          case OP_CRMINRANGE:
3108            case OP_CRPOSRANGE:
3109          minimize = (*ecode == OP_CRMINRANGE);          minimize = (*ecode == OP_CRMINRANGE);
3110            possessive = (*ecode == OP_CRPOSRANGE);
3111          min = GET2(ecode, 1);          min = GET2(ecode, 1);
3112          max = GET2(ecode, 1 + IMM2_SIZE);          max = GET2(ecode, 1 + IMM2_SIZE);
3113          if (max == 0) max = INT_MAX;          if (max == 0) max = INT_MAX;
# Line 3159  for (;;) Line 3179  for (;;)
3179            if (!PRIV(xclass)(c, data, utf)) break;            if (!PRIV(xclass)(c, data, utf)) break;
3180            eptr += len;            eptr += len;
3181            }            }
3182    
3183            if (possessive) continue;    /* No backtracking */
3184    
3185          for(;;)          for(;;)
3186            {            {
3187            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);            RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
# Line 5030  for (;;) Line 5053  for (;;)
5053            case PT_UCNC:            case PT_UCNC:
5054            for (fi = min;; fi++)            for (fi = min;; fi++)
5055              {              {
5056              RMATCH(eptr, ecode, offset_top, md, eptrb, RM68);              RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
5057              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5058              if (fi >= max) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
5059              if (eptr >= md->end_subject)              if (eptr >= md->end_subject)
# Line 6195  switch (frame->Xwhere) Line 6218  switch (frame->Xwhere)
6218    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6219    LBL(65) LBL(66)    LBL(65) LBL(66)
6220  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6221    LBL(21)    LBL(20) LBL(21)
6222  #endif  #endif
6223  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
6224    LBL(16) LBL(18) LBL(20)    LBL(16) LBL(18)
6225    LBL(22) LBL(23) LBL(28) LBL(30)    LBL(22) LBL(23) LBL(28) LBL(30)
6226    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
6227  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6228    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6229    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67) LBL(68)    LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
6230  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
6231  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
6232    default:    default:
# Line 6810  for(;;) Line 6833  for(;;)
6833  #ifndef COMPILE_PCRE8  #ifndef COMPILE_PCRE8
6834          if (c > 255) c = 255;          if (c > 255) c = 255;
6835  #endif  #endif
6836          if ((start_bits[c/8] & (1 << (c&7))) == 0)          if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
6837            {          start_match++;
           start_match++;  
 #if defined SUPPORT_UTF && defined COMPILE_PCRE8  
           /* In non 8-bit mode, the iteration will stop for  
           characters > 255 at the beginning or not stop at all. */  
           if (utf)  
             ACROSSCHAR(start_match < end_subject, *start_match,  
               start_match++);  
 #endif  
           }  
         else break;  
6838          }          }
6839        }        }
6840      }   /* Starting optimizations */      }   /* Starting optimizations */

Legend:
Removed from v.1376  
changed lines
  Added in v.1414

  ViewVC Help
Powered by ViewVC 1.1.5