/[pcre]/code/trunk/pcre_study.c
ViewVC logotype

Diff of /code/trunk/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1084 by chpe, Tue Oct 16 15:55:28 2012 UTC revision 1502 by ph10, Mon Sep 15 13:56:18 2014 UTC
# Line 66  string of that length that matches. In U Line 66  string of that length that matches. In U
66  rather than bytes.  rather than bytes.
67    
68  Arguments:  Arguments:
69      re              compiled pattern block
70    code            pointer to start of group (the bracket)    code            pointer to start of group (the bracket)
71    startcode       pointer to start of the whole pattern    startcode       pointer to start of the whole pattern's code
72    options         the compiling options    options         the compiling options
73    int             RECURSE depth    int             RECURSE depth
74    
# Line 78  Returns:   the minimum length Line 79  Returns:   the minimum length
79  */  */
80    
81  static int  static int
82  find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options,  find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
83    int recurse_depth)    const pcre_uchar *startcode, int options, int recurse_depth)
84  {  {
85  int length = -1;  int length = -1;
86  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
# Line 129  for (;;) Line 130  for (;;)
130      case OP_SBRAPOS:      case OP_SBRAPOS:
131      case OP_ONCE:      case OP_ONCE:
132      case OP_ONCE_NC:      case OP_ONCE_NC:
133      d = find_minlength(cc, startcode, options, recurse_depth);      d = find_minlength(re, cc, startcode, options, recurse_depth);
134      if (d < 0) return d;      if (d < 0) return d;
135      branchlength += d;      branchlength += d;
136      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 175  for (;;) Line 176  for (;;)
176    
177      case OP_REVERSE:      case OP_REVERSE:
178      case OP_CREF:      case OP_CREF:
179      case OP_NCREF:      case OP_DNCREF:
180      case OP_RREF:      case OP_RREF:
181      case OP_NRREF:      case OP_DNRREF:
182      case OP_DEF:      case OP_DEF:
183      case OP_CALLOUT:      case OP_CALLOUT:
184      case OP_SOD:      case OP_SOD:
# Line 224  for (;;) Line 225  for (;;)
225      case OP_NOTPOSPLUSI:      case OP_NOTPOSPLUSI:
226      branchlength++;      branchlength++;
227      cc += 2;      cc += 2;
228  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
229      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
230  #endif  #endif
231      break;      break;
# Line 245  for (;;) Line 246  for (;;)
246      case OP_NOTEXACTI:      case OP_NOTEXACTI:
247      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
248      cc += 2 + IMM2_SIZE;      cc += 2 + IMM2_SIZE;
249  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
250      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
251  #endif  #endif
252      break;      break;
# Line 323  for (;;) Line 324  for (;;)
324    
325      /* Check a class for variable quantification */      /* Check a class for variable quantification */
326    
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  
     case OP_XCLASS:  
     cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];  
     /* Fall through */  
 #endif  
   
327      case OP_CLASS:      case OP_CLASS:
328      case OP_NCLASS:      case OP_NCLASS:
329    #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
330        case OP_XCLASS:
331        /* The original code caused an unsigned overflow in 64 bit systems,
332        so now we use a conditional statement. */
333        if (op == OP_XCLASS)
334          cc += GET(cc, 1);
335        else
336          cc += PRIV(OP_lengths)[OP_CLASS];
337    #else
338      cc += PRIV(OP_lengths)[OP_CLASS];      cc += PRIV(OP_lengths)[OP_CLASS];
339    #endif
340    
341      switch (*cc)      switch (*cc)
342        {        {
343        case OP_CRPLUS:        case OP_CRPLUS:
344        case OP_CRMINPLUS:        case OP_CRMINPLUS:
345          case OP_CRPOSPLUS:
346        branchlength++;        branchlength++;
347        /* Fall through */        /* Fall through */
348    
# Line 344  for (;;) Line 350  for (;;)
350        case OP_CRMINSTAR:        case OP_CRMINSTAR:
351        case OP_CRQUERY:        case OP_CRQUERY:
352        case OP_CRMINQUERY:        case OP_CRMINQUERY:
353          case OP_CRPOSSTAR:
354          case OP_CRPOSQUERY:
355        cc++;        cc++;
356        break;        break;
357    
358        case OP_CRRANGE:        case OP_CRRANGE:
359        case OP_CRMINRANGE:        case OP_CRMINRANGE:
360          case OP_CRPOSRANGE:
361        branchlength += GET2(cc,1);        branchlength += GET2(cc,1);
362        cc += 1 + 2 * IMM2_SIZE;        cc += 1 + 2 * IMM2_SIZE;
363        break;        break;
# Line 371  for (;;) Line 380  for (;;)
380      matches an empty string (by default it causes a matching failure), so in      matches an empty string (by default it causes a matching failure), so in
381      that case we must set the minimum length to zero. */      that case we must set the minimum length to zero. */
382    
383      case OP_REF:      case OP_DNREF:     /* Duplicate named pattern back reference */
384        case OP_DNREFI:
385        if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
386          {
387          int count = GET2(cc, 1+IMM2_SIZE);
388          pcre_uchar *slot = (pcre_uchar *)re +
389            re->name_table_offset + GET2(cc, 1) * re->name_entry_size;
390          d = INT_MAX;
391          while (count-- > 0)
392            {
393            ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
394            if (cs == NULL) return -2;
395            do ce += GET(ce, 1); while (*ce == OP_ALT);
396            if (cc > cs && cc < ce)
397              {
398              d = 0;
399              had_recurse = TRUE;
400              break;
401              }
402            else
403              {
404              int dd = find_minlength(re, cs, startcode, options, recurse_depth);
405              if (dd < d) d = dd;
406              }
407            slot += re->name_entry_size;
408            }
409          }
410        else d = 0;
411        cc += 1 + 2*IMM2_SIZE;
412        goto REPEAT_BACK_REFERENCE;
413    
414        case OP_REF:      /* Single back reference */
415      case OP_REFI:      case OP_REFI:
416      if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)      if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
417        {        {
# Line 385  for (;;) Line 425  for (;;)
425          }          }
426        else        else
427          {          {
428          d = find_minlength(cs, startcode, options, recurse_depth);          d = find_minlength(re, cs, startcode, options, recurse_depth);
429          }          }
430        }        }
431      else d = 0;      else d = 0;
# Line 393  for (;;) Line 433  for (;;)
433    
434      /* Handle repeated back references */      /* Handle repeated back references */
435    
436        REPEAT_BACK_REFERENCE:
437      switch (*cc)      switch (*cc)
438        {        {
439        case OP_CRSTAR:        case OP_CRSTAR:
440        case OP_CRMINSTAR:        case OP_CRMINSTAR:
441        case OP_CRQUERY:        case OP_CRQUERY:
442        case OP_CRMINQUERY:        case OP_CRMINQUERY:
443          case OP_CRPOSSTAR:
444          case OP_CRPOSQUERY:
445        min = 0;        min = 0;
446        cc++;        cc++;
447        break;        break;
448    
449        case OP_CRPLUS:        case OP_CRPLUS:
450        case OP_CRMINPLUS:        case OP_CRMINPLUS:
451          case OP_CRPOSPLUS:
452        min = 1;        min = 1;
453        cc++;        cc++;
454        break;        break;
455    
456        case OP_CRRANGE:        case OP_CRRANGE:
457        case OP_CRMINRANGE:        case OP_CRMINRANGE:
458          case OP_CRPOSRANGE:
459        min = GET2(cc, 1);        min = GET2(cc, 1);
460        cc += 1 + 2 * IMM2_SIZE;        cc += 1 + 2 * IMM2_SIZE;
461        break;        break;
# Line 433  for (;;) Line 478  for (;;)
478        had_recurse = TRUE;        had_recurse = TRUE;
479      else      else
480        {        {
481        branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);        branchlength += find_minlength(re, cs, startcode, options,
482            recurse_depth + 1);
483        }        }
484      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
485      break;      break;
# Line 486  for (;;) Line 532  for (;;)
532      case OP_NOTPOSQUERYI:      case OP_NOTPOSQUERYI:
533    
534      cc += PRIV(OP_lengths)[op];      cc += PRIV(OP_lengths)[op];
535  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
536      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
537  #endif  #endif
538      break;      break;
# Line 547  static const pcre_uchar * Line 593  static const pcre_uchar *
593  set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,  set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,
594    compile_data *cd, BOOL utf)    compile_data *cd, BOOL utf)
595  {  {
596  unsigned int c = *p;  pcre_uint32 c = *p;
597    
598  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
599  SET_BIT(c);  SET_BIT(c);
# Line 568  if (utf && c > 127) Line 614  if (utf && c > 127)
614    return p;    return p;
615    }    }
616  #else   /* Not SUPPORT_UTF */  #else   /* Not SUPPORT_UTF */
617  (void)(utf);   /* Stops warning for unused parameter */  (void)(utf);   /* Stops warning for unused parameter */
618  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
619    
620  /* Not UTF-8 mode, or character is less than 127. */  /* Not UTF-8 mode, or character is less than 127. */
# Line 601  if (utf && c > 127) Line 647  if (utf && c > 127)
647    return p;    return p;
648    }    }
649  #else   /* Not SUPPORT_UTF */  #else   /* Not SUPPORT_UTF */
650  (void)(utf);   /* Stops warning for unused parameter */  (void)(utf);   /* Stops warning for unused parameter */
651  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
652    
653  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
# Line 632  Returns:         nothing Line 678  Returns:         nothing
678  */  */
679    
680  static void  static void
681  set_type_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit,  set_type_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
682    compile_data *cd)    compile_data *cd)
683  {  {
684  register int c;  register pcre_uint32 c;
685  for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];  for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
686  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
687  if (table_limit == 32) return;  if (table_limit == 32) return;
# Line 674  Returns:         nothing Line 720  Returns:         nothing
720  */  */
721    
722  static void  static void
723  set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, int table_limit,  set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
724    compile_data *cd)    compile_data *cd)
725  {  {
726  register int c;  register pcre_uint32 c;
727  for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];  for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
728  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
729  if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;  if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
# Line 714  static int Line 760  static int
760  set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,  set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,
761    compile_data *cd)    compile_data *cd)
762  {  {
763  register int c;  register pcre_uint32 c;
764  int yield = SSB_DONE;  int yield = SSB_DONE;
765  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
766  int table_limit = utf? 16:32;  int table_limit = utf? 16:32;
# Line 774  do Line 820  do
820        case OP_COND:        case OP_COND:
821        case OP_CREF:        case OP_CREF:
822        case OP_DEF:        case OP_DEF:
823          case OP_DNCREF:
824          case OP_DNREF:
825          case OP_DNREFI:
826          case OP_DNRREF:
827        case OP_DOLL:        case OP_DOLL:
828        case OP_DOLLM:        case OP_DOLLM:
829        case OP_END:        case OP_END:
# Line 782  do Line 832  do
832        case OP_EXTUNI:        case OP_EXTUNI:
833        case OP_FAIL:        case OP_FAIL:
834        case OP_MARK:        case OP_MARK:
       case OP_NCREF:  
835        case OP_NOT:        case OP_NOT:
836        case OP_NOTEXACT:        case OP_NOTEXACT:
837        case OP_NOTEXACTI:        case OP_NOTEXACTI:
# Line 814  do Line 863  do
863        case OP_NOTUPTOI:        case OP_NOTUPTOI:
864        case OP_NOT_HSPACE:        case OP_NOT_HSPACE:
865        case OP_NOT_VSPACE:        case OP_NOT_VSPACE:
       case OP_NRREF:  
       case OP_PROP:  
866        case OP_PRUNE:        case OP_PRUNE:
867        case OP_PRUNE_ARG:        case OP_PRUNE_ARG:
868        case OP_RECURSE:        case OP_RECURSE:
# Line 831  do Line 878  do
878        case OP_SOM:        case OP_SOM:
879        case OP_THEN:        case OP_THEN:
880        case OP_THEN_ARG:        case OP_THEN_ARG:
 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  
       case OP_XCLASS:  
 #endif  
881        return SSB_FAIL;        return SSB_FAIL;
882    
883          /* A "real" property test implies no starting bits, but the fake property
884          PT_CLIST identifies a list of characters. These lists are short, as they
885          are used for characters with more than one "other case", so there is no
886          point in recognizing them for OP_NOTPROP. */
887    
888          case OP_PROP:
889          if (tcode[1] != PT_CLIST) return SSB_FAIL;
890            {
891            const pcre_uint32 *p = PRIV(ucd_caseless_sets) + tcode[2];
892            while ((c = *p++) < NOTACHAR)
893              {
894    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
895              if (utf)
896                {
897                pcre_uchar buff[6];
898                (void)PRIV(ord2utf)(c, buff);
899                c = buff[0];
900                }
901    #endif
902              if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
903              }
904            }
905          try_next = FALSE;
906          break;
907    
908        /* We can ignore word boundary tests. */        /* We can ignore word boundary tests. */
909    
910        case OP_WORD_BOUNDARY:        case OP_WORD_BOUNDARY:
# Line 1008  do Line 1077  do
1077        else        else
1078  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
1079          {          {
1080  #ifndef EBCDIC  #ifndef EBCDIC
1081          SET_BIT(0xA0);          SET_BIT(0xA0);
1082  #endif  /* Not EBCDIC */  #endif  /* Not EBCDIC */
1083  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1084          SET_BIT(0xFF);  /* For characters > 255 */          SET_BIT(0xFF);  /* For characters > 255 */
1085  #endif  /* COMPILE_PCRE[16|32] */  #endif  /* COMPILE_PCRE[16|32] */
# Line 1061  do Line 1130  do
1130        try_next = FALSE;        try_next = FALSE;
1131        break;        break;
1132    
1133        /* The cbit_space table has vertical tab as whitespace; we have to        /* The cbit_space table has vertical tab as whitespace; we no longer
1134        ensure it is set as not whitespace. Luckily, the code value is the same        have to play fancy tricks because Perl added VT to its whitespace at
1135        (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */        release 5.18. PCRE added it at release 8.34. */
1136    
1137        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
1138        set_nottype_bits(start_bits, cbit_space, table_limit, cd);        set_nottype_bits(start_bits, cbit_space, table_limit, cd);
       start_bits[1] |= 0x08;  
1139        try_next = FALSE;        try_next = FALSE;
1140        break;        break;
1141    
       /* The cbit_space table has vertical tab as whitespace; we have to not  
       set it from the table. Luckily, the code value is the same (0x0b) in  
       ASCII and EBCDIC, so we can just adjust the appropriate bit. */  
   
1142        case OP_WHITESPACE:        case OP_WHITESPACE:
       c = start_bits[1];    /* Save in case it was already set */  
1143        set_type_bits(start_bits, cbit_space, table_limit, cd);        set_type_bits(start_bits, cbit_space, table_limit, cd);
       start_bits[1] = (start_bits[1] & ~0x08) | c;  
1144        try_next = FALSE;        try_next = FALSE;
1145        break;        break;
1146    
# Line 1146  do Line 1208  do
1208  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
1209  #ifndef EBCDIC  #ifndef EBCDIC
1210            SET_BIT(0xA0);            SET_BIT(0xA0);
1211  #endif  /* Not EBCDIC */  #endif  /* Not EBCDIC */
1212          break;          break;
1213    
1214          case OP_ANYNL:          case OP_ANYNL:
# Line 1179  do Line 1241  do
1241          set_type_bits(start_bits, cbit_digit, table_limit, cd);          set_type_bits(start_bits, cbit_digit, table_limit, cd);
1242          break;          break;
1243    
1244          /* The cbit_space table has vertical tab as whitespace; we have to          /* The cbit_space table has vertical tab as whitespace; we no longer
1245          ensure it gets set as not whitespace. Luckily, the code value is the          have to play fancy tricks because Perl added VT to its whitespace at
1246          same (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate          release 5.18. PCRE added it at release 8.34. */
         bit. */  
1247    
1248          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
1249          set_nottype_bits(start_bits, cbit_space, table_limit, cd);          set_nottype_bits(start_bits, cbit_space, table_limit, cd);
         start_bits[1] |= 0x08;  
1250          break;          break;
1251    
         /* The cbit_space table has vertical tab as whitespace; we have to  
         avoid setting it. Luckily, the code value is the same (0x0b) in ASCII  
         and EBCDIC, so we can just adjust the appropriate bit. */  
   
1252          case OP_WHITESPACE:          case OP_WHITESPACE:
         c = start_bits[1];    /* Save in case it was already set */  
1253          set_type_bits(start_bits, cbit_space, table_limit, cd);          set_type_bits(start_bits, cbit_space, table_limit, cd);
         start_bits[1] = (start_bits[1] & ~0x08) | c;  
1254          break;          break;
1255    
1256          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
# Line 1217  do Line 1271  do
1271        with a value >= 0xc4 is a potentially valid starter because it starts a        with a value >= 0xc4 is a potentially valid starter because it starts a
1272        character with a value > 255. */        character with a value > 255. */
1273    
1274    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1275          case OP_XCLASS:
1276          if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0)
1277            return SSB_FAIL;
1278          /* All bits are set. */
1279          if ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0 && (tcode[1 + LINK_SIZE] & XCL_NOT) != 0)
1280            return SSB_FAIL;
1281    #endif
1282          /* Fall through */
1283    
1284        case OP_NCLASS:        case OP_NCLASS:
1285  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1286        if (utf)        if (utf)
# Line 1233  do Line 1297  do
1297        case OP_CLASS:        case OP_CLASS:
1298          {          {
1299          pcre_uint8 *map;          pcre_uint8 *map;
1300          tcode++;  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1301          map = (pcre_uint8 *)tcode;          map = NULL;
1302            if (*tcode == OP_XCLASS)
1303              {
1304              if ((tcode[1 + LINK_SIZE] & XCL_MAP) != 0)
1305                map = (pcre_uint8 *)(tcode + 1 + LINK_SIZE + 1);
1306              tcode += GET(tcode, 1);
1307              }
1308            else
1309    #endif
1310              {
1311              tcode++;
1312              map = (pcre_uint8 *)tcode;
1313              tcode += 32 / sizeof(pcre_uchar);
1314              }
1315    
1316          /* In UTF-8 mode, the bits in a bit map correspond to character          /* In UTF-8 mode, the bits in a bit map correspond to character
1317          values, not to byte values. However, the bit map we are constructing is          values, not to byte values. However, the bit map we are constructing is
# Line 1242  do Line 1319  do
1319          value is > 127. In fact, there are only two possible starting bytes for          value is > 127. In fact, there are only two possible starting bytes for
1320          characters in the range 128 - 255. */          characters in the range 128 - 255. */
1321    
1322  #if defined SUPPORT_UTF && defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1323          if (utf)          if (map != NULL)
1324    #endif
1325            {            {
1326            for (c = 0; c < 16; c++) start_bits[c] |= map[c];  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1327            for (c = 128; c < 256; c++)            if (utf)
1328              {              {
1329              if ((map[c/8] && (1 << (c&7))) != 0)              for (c = 0; c < 16; c++) start_bits[c] |= map[c];
1330                for (c = 128; c < 256; c++)
1331                {                {
1332                int d = (c >> 6) | 0xc0;            /* Set bit for this starter */                if ((map[c/8] && (1 << (c&7))) != 0)
1333                start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */                  {
1334                c = (c & 0xc0) + 0x40 - 1;          /* next relevant character. */                  int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
1335                    start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
1336                    c = (c & 0xc0) + 0x40 - 1;          /* next relevant character. */
1337                    }
1338                }                }
1339              }              }
1340            }            else
         else  
1341  #endif  #endif
1342            {              {
1343            /* In non-UTF-8 mode, the two bit maps are completely compatible. */              /* In non-UTF-8 mode, the two bit maps are completely compatible. */
1344            for (c = 0; c < 32; c++) start_bits[c] |= map[c];              for (c = 0; c < 32; c++) start_bits[c] |= map[c];
1345                }
1346            }            }
1347    
1348          /* Advance past the bit map, and act on what follows. For a zero          /* Advance past the bit map, and act on what follows. For a zero
1349          minimum repeat, continue; otherwise stop processing. */          minimum repeat, continue; otherwise stop processing. */
1350    
         tcode += 32 / sizeof(pcre_uchar);  
1351          switch (*tcode)          switch (*tcode)
1352            {            {
1353            case OP_CRSTAR:            case OP_CRSTAR:
1354            case OP_CRMINSTAR:            case OP_CRMINSTAR:
1355            case OP_CRQUERY:            case OP_CRQUERY:
1356            case OP_CRMINQUERY:            case OP_CRMINQUERY:
1357              case OP_CRPOSSTAR:
1358              case OP_CRPOSQUERY:
1359            tcode++;            tcode++;
1360            break;            break;
1361    
1362            case OP_CRRANGE:            case OP_CRRANGE:
1363            case OP_CRMINRANGE:            case OP_CRMINRANGE:
1364              case OP_CRPOSRANGE:
1365            if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;            if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
1366              else try_next = FALSE;              else try_next = FALSE;
1367            break;            break;
# Line 1342  pcre_uchar *code; Line 1426  pcre_uchar *code;
1426  compile_data compile_block;  compile_data compile_block;
1427  const REAL_PCRE *re = (const REAL_PCRE *)external_re;  const REAL_PCRE *re = (const REAL_PCRE *)external_re;
1428    
1429    
1430  *errorptr = NULL;  *errorptr = NULL;
1431    
1432  if (re == NULL || re->magic_number != MAGIC_NUMBER)  if (re == NULL || re->magic_number != MAGIC_NUMBER)
# Line 1418  if ((re->options & PCRE_ANCHORED) == 0 & Line 1503  if ((re->options & PCRE_ANCHORED) == 0 &
1503    
1504  /* Find the minimum length of subject string. */  /* Find the minimum length of subject string. */
1505    
1506  switch(min = find_minlength(code, code, re->options, 0))  switch(min = find_minlength(re, code, code, re->options, 0))
1507    {    {
1508    case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;    case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
1509    case -3: *errorptr = "internal error: opcode not recognized"; return NULL;    case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
# Line 1426  switch(min = find_minlength(code, code, Line 1511  switch(min = find_minlength(code, code,
1511    }    }
1512    
1513  /* If a set of starting bytes has been identified, or if the minimum length is  /* If a set of starting bytes has been identified, or if the minimum length is
1514  greater than zero, or if JIT optimization has been requested, or if  greater than zero, or if JIT optimization has been requested, or if
1515  PCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a  PCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a
1516  pcre_study_data block. The study data is put in the latter, which is pointed to  pcre_study_data block. The study data is put in the latter, which is pointed to
1517  by the former, which may also get additional data set later by the calling  by the former, which may also get additional data set later by the calling
# Line 1437  becomes variable in the future, we don't Line 1522  becomes variable in the future, we don't
1522  if (bits_set || min > 0 || (options & (  if (bits_set || min > 0 || (options & (
1523  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
1524      PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE |      PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE |
1525      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE |      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE |
1526  #endif  #endif
1527      PCRE_STUDY_EXTRA_NEEDED)) != 0)      PCRE_STUDY_EXTRA_NEEDED)) != 0)
1528    {    {
# Line 1493  if (bits_set || min > 0 || (options & ( Line 1578  if (bits_set || min > 0 || (options & (
1578    
1579    /* If JIT support was compiled and requested, attempt the JIT compilation.    /* If JIT support was compiled and requested, attempt the JIT compilation.
1580    If no starting bytes were found, and the minimum length is zero, and JIT    If no starting bytes were found, and the minimum length is zero, and JIT
1581    compilation fails, abandon the extra block and return NULL, unless    compilation fails, abandon the extra block and return NULL, unless
1582    PCRE_STUDY_EXTRA_NEEDED is set. */    PCRE_STUDY_EXTRA_NEEDED is set. */
1583    
1584  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT

Legend:
Removed from v.1084  
changed lines
  Added in v.1502

  ViewVC Help
Powered by ViewVC 1.1.5