/[pcre]/code/trunk/pcre_study.c
ViewVC logotype

Diff of /code/trunk/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 657 by ph10, Mon Aug 15 17:39:09 2011 UTC revision 1146 by zherczeg, Sat Oct 20 16:45:33 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 66  string of that length that matches. In U Line 66  string of that length that matches. In U
66  rather than bytes.  rather than bytes.
67    
68  Arguments:  Arguments:
69    code        pointer to start of group (the bracket)    code            pointer to start of group (the bracket)
70    startcode   pointer to start of the whole pattern    startcode       pointer to start of the whole pattern
71    options     the compiling options    options         the compiling options
72    had_accept  pointer to flag for (*ACCEPT) encountered    int             RECURSE depth
   int         RECURSE depth  
73    
74  Returns:   the minimum length  Returns:   the minimum length
75             -1 if \C was encountered             -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
76             -2 internal error (missing capturing bracket)             -2 internal error (missing capturing bracket)
77             -3 internal error (opcode not listed)             -3 internal error (opcode not listed)
78  */  */
79    
80  static int  static int
81  find_minlength(const uschar *code, const uschar *startcode, int options,  find_minlength(const pcre_uchar *code, const pcre_uchar *startcode, int options,
82    BOOL *had_accept_ptr, int recurse_depth)    int recurse_depth)
83  {  {
84  int length = -1;  int length = -1;
85  BOOL utf8 = (options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
86    BOOL utf = (options & PCRE_UTF8) != 0;
87  BOOL had_recurse = FALSE;  BOOL had_recurse = FALSE;
88  register int branchlength = 0;  register int branchlength = 0;
89  register uschar *cc = (uschar *)code + 1 + LINK_SIZE;  register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
90    
91  if (*code == OP_CBRA || *code == OP_SCBRA ||  if (*code == OP_CBRA || *code == OP_SCBRA ||
92      *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += 2;      *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
93    
94  /* Scan along the opcodes for this branch. If we get to the end of the  /* Scan along the opcodes for this branch. If we get to the end of the
95  branch, check the length against that of the other branches. */  branch, check the length against that of the other branches. */
# Line 97  branch, check the length against that of Line 97  branch, check the length against that of
97  for (;;)  for (;;)
98    {    {
99    int d, min;    int d, min;
100    uschar *cs, *ce;    pcre_uchar *cs, *ce;
101    register int op = *cc;    register pcre_uchar op = *cc;
102    
103    switch (op)    switch (op)
104      {      {
# Line 128  for (;;) Line 128  for (;;)
128      case OP_BRAPOS:      case OP_BRAPOS:
129      case OP_SBRAPOS:      case OP_SBRAPOS:
130      case OP_ONCE:      case OP_ONCE:
131      d = find_minlength(cc, startcode, options, had_accept_ptr, recurse_depth);      case OP_ONCE_NC:
132        d = find_minlength(cc, startcode, options, recurse_depth);
133      if (d < 0) return d;      if (d < 0) return d;
134      branchlength += d;      branchlength += d;
     if (*had_accept_ptr) return branchlength;  
135      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
136      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
137      break;      break;
138    
139      /* Reached end of a branch; if it's a ket it is the end of a nested      /* ACCEPT makes things far too complicated; we have to give up. */
     call. If it's ALT it is an alternation in a nested call. If it is END it's  
     the end of the outer call. All can be handled by the same code. If it is  
     ACCEPT, it is essentially the same as END, but we set a flag so that  
     counting stops. */  
140    
141      case OP_ACCEPT:      case OP_ACCEPT:
142      case OP_ASSERT_ACCEPT:      case OP_ASSERT_ACCEPT:
143      *had_accept_ptr = TRUE;      return -1;
144      /* Fall through */  
145        /* Reached end of a branch; if it's a ket it is the end of a nested
146        call. If it's ALT it is an alternation in a nested call. If it is END it's
147        the end of the outer call. All can be handled by the same code. If an
148        ACCEPT was previously encountered, use the length that was in force at that
149        time, and pass back the shortest ACCEPT length. */
150    
151      case OP_ALT:      case OP_ALT:
152      case OP_KET:      case OP_KET:
153      case OP_KETRMAX:      case OP_KETRMAX:
# Line 188  for (;;) Line 190  for (;;)
190      case OP_DOLLM:      case OP_DOLLM:
191      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
192      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
193      cc += _pcre_OP_lengths[*cc];      cc += PRIV(OP_lengths)[*cc];
194      break;      break;
195    
196      /* Skip over a subpattern that has a {0} or {0,x} quantifier */      /* Skip over a subpattern that has a {0} or {0,x} quantifier */
# Line 197  for (;;) Line 199  for (;;)
199      case OP_BRAMINZERO:      case OP_BRAMINZERO:
200      case OP_BRAPOSZERO:      case OP_BRAPOSZERO:
201      case OP_SKIPZERO:      case OP_SKIPZERO:
202      cc += _pcre_OP_lengths[*cc];      cc += PRIV(OP_lengths)[*cc];
203      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
204      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
205      break;      break;
# Line 222  for (;;) Line 224  for (;;)
224      case OP_NOTPOSPLUSI:      case OP_NOTPOSPLUSI:
225      branchlength++;      branchlength++;
226      cc += 2;      cc += 2;
227  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
228      if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
229  #endif  #endif
230      break;      break;
231    
# Line 242  for (;;) Line 244  for (;;)
244      case OP_NOTEXACT:      case OP_NOTEXACT:
245      case OP_NOTEXACTI:      case OP_NOTEXACTI:
246      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
247      cc += 4;      cc += 2 + IMM2_SIZE;
248  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
249      if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
250  #endif  #endif
251      break;      break;
252    
253      case OP_TYPEEXACT:      case OP_TYPEEXACT:
254      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
255      cc += (cc[3] == OP_PROP || cc[3] == OP_NOTPROP)? 6 : 4;      cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP
256          || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
257      break;      break;
258    
259      /* Handle single-char non-literal matchers */      /* Handle single-char non-literal matchers */
# Line 285  for (;;) Line 288  for (;;)
288      cc++;      cc++;
289      break;      break;
290    
291      /* The single-byte matcher means we can't proceed in UTF-8 mode */      /* The single-byte matcher means we can't proceed in UTF-8 mode. (In
292        non-UTF-8 mode \C will actually be turned into OP_ALLANY, so won't ever
293        appear, but leave the code, just in case.) */
294    
295      case OP_ANYBYTE:      case OP_ANYBYTE:
296  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
297      if (utf8) return -1;      if (utf) return -1;
298  #endif  #endif
299      branchlength++;      branchlength++;
300      cc++;      cc++;
# Line 305  for (;;) Line 310  for (;;)
310      case OP_TYPEPOSSTAR:      case OP_TYPEPOSSTAR:
311      case OP_TYPEPOSQUERY:      case OP_TYPEPOSQUERY:
312      if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;      if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
313      cc += _pcre_OP_lengths[op];      cc += PRIV(OP_lengths)[op];
314      break;      break;
315    
316      case OP_TYPEUPTO:      case OP_TYPEUPTO:
317      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
318      case OP_TYPEPOSUPTO:      case OP_TYPEPOSUPTO:
319      if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;      if (cc[1 + IMM2_SIZE] == OP_PROP
320      cc += _pcre_OP_lengths[op];        || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
321        cc += PRIV(OP_lengths)[op];
322      break;      break;
323    
324      /* Check a class for variable quantification */      /* Check a class for variable quantification */
325    
326  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
327      case OP_XCLASS:      case OP_XCLASS:
328      cc += GET(cc, 1) - 33;      cc += GET(cc, 1);
329        cc -= PRIV(OP_lengths)[OP_CLASS];
330      /* Fall through */      /* Fall through */
331  #endif  #endif
332    
333      case OP_CLASS:      case OP_CLASS:
334      case OP_NCLASS:      case OP_NCLASS:
335      cc += 33;      cc += PRIV(OP_lengths)[OP_CLASS];
336    
337      switch (*cc)      switch (*cc)
338        {        {
# Line 344  for (;;) Line 351  for (;;)
351        case OP_CRRANGE:        case OP_CRRANGE:
352        case OP_CRMINRANGE:        case OP_CRMINRANGE:
353        branchlength += GET2(cc,1);        branchlength += GET2(cc,1);
354        cc += 5;        cc += 1 + 2 * IMM2_SIZE;
355        break;        break;
356    
357        default:        default:
# Line 369  for (;;) Line 376  for (;;)
376      case OP_REFI:      case OP_REFI:
377      if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)      if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
378        {        {
379        ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1));        ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
380        if (cs == NULL) return -2;        if (cs == NULL) return -2;
381        do ce += GET(ce, 1); while (*ce == OP_ALT);        do ce += GET(ce, 1); while (*ce == OP_ALT);
382        if (cc > cs && cc < ce)        if (cc > cs && cc < ce)
# Line 379  for (;;) Line 386  for (;;)
386          }          }
387        else        else
388          {          {
389          d = find_minlength(cs, startcode, options, had_accept_ptr,          d = find_minlength(cs, startcode, options, recurse_depth);
           recurse_depth);  
         *had_accept_ptr = FALSE;  
390          }          }
391        }        }
392      else d = 0;      else d = 0;
393      cc += 3;      cc += 1 + IMM2_SIZE;
394    
395      /* Handle repeated back references */      /* Handle repeated back references */
396    
# Line 408  for (;;) Line 413  for (;;)
413        case OP_CRRANGE:        case OP_CRRANGE:
414        case OP_CRMINRANGE:        case OP_CRMINRANGE:
415        min = GET2(cc, 1);        min = GET2(cc, 1);
416        cc += 5;        cc += 1 + 2 * IMM2_SIZE;
417        break;        break;
418    
419        default:        default:
# Line 423  for (;;) Line 428  for (;;)
428      caught by a recursion depth count. */      caught by a recursion depth count. */
429    
430      case OP_RECURSE:      case OP_RECURSE:
431      cs = ce = (uschar *)startcode + GET(cc, 1);      cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
     if (cs == NULL) return -2;  
432      do ce += GET(ce, 1); while (*ce == OP_ALT);      do ce += GET(ce, 1); while (*ce == OP_ALT);
433      if ((cc > cs && cc < ce) || recurse_depth > 10)      if ((cc > cs && cc < ce) || recurse_depth > 10)
434        had_recurse = TRUE;        had_recurse = TRUE;
435      else      else
436        {        {
437        branchlength += find_minlength(cs, startcode, options, had_accept_ptr,        branchlength += find_minlength(cs, startcode, options, recurse_depth + 1);
         recurse_depth + 1);  
       *had_accept_ptr = FALSE;  
438        }        }
439      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
440      break;      break;
# Line 484  for (;;) Line 486  for (;;)
486      case OP_NOTPOSQUERY:      case OP_NOTPOSQUERY:
487      case OP_NOTPOSQUERYI:      case OP_NOTPOSQUERYI:
488    
489      cc += _pcre_OP_lengths[op];      cc += PRIV(OP_lengths)[op];
490  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
491      if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
492  #endif  #endif
493      break;      break;
494    
# Line 495  for (;;) Line 497  for (;;)
497      case OP_MARK:      case OP_MARK:
498      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
499      case OP_SKIP_ARG:      case OP_SKIP_ARG:
     cc += _pcre_OP_lengths[op] + cc[1];  
     break;  
   
500      case OP_THEN_ARG:      case OP_THEN_ARG:
501      cc += _pcre_OP_lengths[op] + cc[1+LINK_SIZE];      cc += PRIV(OP_lengths)[op] + cc[1];
502      break;      break;
503    
504      /* The remaining opcodes are just skipped over. */      /* The remaining opcodes are just skipped over. */
# Line 511  for (;;) Line 510  for (;;)
510      case OP_SET_SOM:      case OP_SET_SOM:
511      case OP_SKIP:      case OP_SKIP:
512      case OP_THEN:      case OP_THEN:
513      cc += _pcre_OP_lengths[op];      cc += PRIV(OP_lengths)[op];
514      break;      break;
515    
516      /* This should not occur: we list all opcodes explicitly so that when      /* This should not occur: we list all opcodes explicitly so that when
# Line 540  Arguments: Line 539  Arguments:
539    p             points to the character    p             points to the character
540    caseless      the caseless flag    caseless      the caseless flag
541    cd            the block with char table pointers    cd            the block with char table pointers
542    utf8          TRUE for UTF-8 mode    utf           TRUE for UTF-8 / UTF-16 / UTF-32 mode
543    
544  Returns:        pointer after the character  Returns:        pointer after the character
545  */  */
546    
547  static const uschar *  static const pcre_uchar *
548  set_table_bit(uschar *start_bits, const uschar *p, BOOL caseless,  set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,
549    compile_data *cd, BOOL utf8)    compile_data *cd, BOOL utf)
550  {  {
551  unsigned int c = *p;  pcre_uint32 c = *p;
552    
553    #ifdef COMPILE_PCRE8
554  SET_BIT(c);  SET_BIT(c);
555    
556  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
557  if (utf8 && c > 127)  if (utf && c > 127)
558    {    {
559    GETCHARINC(c, p);    GETCHARINC(c, p);
560  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
561    if (caseless)    if (caseless)
562      {      {
563      uschar buff[8];      pcre_uchar buff[6];
564      c = UCD_OTHERCASE(c);      c = UCD_OTHERCASE(c);
565      (void)_pcre_ord2utf8(c, buff);      (void)PRIV(ord2utf)(c, buff);
566      SET_BIT(buff[0]);      SET_BIT(buff[0]);
567      }      }
568  #endif  #endif  /* Not SUPPORT_UCP */
569    return p;    return p;
570    }    }
571  #endif  #else   /* Not SUPPORT_UTF */
572    (void)(utf);   /* Stops warning for unused parameter */
573    #endif  /* SUPPORT_UTF */
574    
575  /* Not UTF-8 mode, or character is less than 127. */  /* Not UTF-8 mode, or character is less than 127. */
576    
577  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
578  return p + 1;  return p + 1;
579    #endif  /* COMPILE_PCRE8 */
580    
581    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
582    if (c > 0xff)
583      {
584      c = 0xff;
585      caseless = FALSE;
586      }
587    SET_BIT(c);
588    
589    #ifdef SUPPORT_UTF
590    if (utf && c > 127)
591      {
592      GETCHARINC(c, p);
593    #ifdef SUPPORT_UCP
594      if (caseless)
595        {
596        c = UCD_OTHERCASE(c);
597        if (c > 0xff)
598          c = 0xff;
599        SET_BIT(c);
600        }
601    #endif  /* SUPPORT_UCP */
602      return p;
603      }
604    #else   /* Not SUPPORT_UTF */
605    (void)(utf);   /* Stops warning for unused parameter */
606    #endif  /* SUPPORT_UTF */
607    
608    if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
609    return p + 1;
610    #endif
611  }  }
612    
613    
# Line 599  Returns:         nothing Line 633  Returns:         nothing
633  */  */
634    
635  static void  static void
636  set_type_bits(uschar *start_bits, int cbit_type, int table_limit,  set_type_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
637    compile_data *cd)    compile_data *cd)
638  {  {
639  register int c;  register pcre_uint32 c;
640  for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];  for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
641    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
642  if (table_limit == 32) return;  if (table_limit == 32) return;
643  for (c = 128; c < 256; c++)  for (c = 128; c < 256; c++)
644    {    {
645    if ((cd->cbits[c/8] & (1 << (c&7))) != 0)    if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
646      {      {
647      uschar buff[8];      pcre_uchar buff[6];
648      (void)_pcre_ord2utf8(c, buff);      (void)PRIV(ord2utf)(c, buff);
649      SET_BIT(buff[0]);      SET_BIT(buff[0]);
650      }      }
651    }    }
652    #endif
653  }  }
654    
655    
# Line 639  Returns:         nothing Line 675  Returns:         nothing
675  */  */
676    
677  static void  static void
678  set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit,  set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
679    compile_data *cd)    compile_data *cd)
680  {  {
681  register int c;  register pcre_uint32 c;
682  for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];  for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
683    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
684  if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;  if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
685    #endif
686  }  }
687    
688    
# Line 664  function fails unless the result is SSB_ Line 702  function fails unless the result is SSB_
702  Arguments:  Arguments:
703    code         points to an expression    code         points to an expression
704    start_bits   points to a 32-byte table, initialized to 0    start_bits   points to a 32-byte table, initialized to 0
705    utf8         TRUE if in UTF-8 mode    utf          TRUE if in UTF-8 / UTF-16 / UTF-32 mode
706    cd           the block with char table pointers    cd           the block with char table pointers
707    
708  Returns:       SSB_FAIL     => Failed to find any starting bytes  Returns:       SSB_FAIL     => Failed to find any starting bytes
# Line 674  Returns:       SSB_FAIL     => Failed to Line 712  Returns:       SSB_FAIL     => Failed to
712  */  */
713    
714  static int  static int
715  set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8,  set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,
716    compile_data *cd)    compile_data *cd)
717  {  {
718  register int c;  register pcre_uint32 c;
719  int yield = SSB_DONE;  int yield = SSB_DONE;
720  int table_limit = utf8? 16:32;  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
721    int table_limit = utf? 16:32;
722    #else
723    int table_limit = 32;
724    #endif
725    
726  #if 0  #if 0
727  /* ========================================================================= */  /* ========================================================================= */
# Line 701  volatile int dummy; Line 743  volatile int dummy;
743  do  do
744    {    {
745    BOOL try_next = TRUE;    BOOL try_next = TRUE;
746    const uschar *tcode = code + 1 + LINK_SIZE;    const pcre_uchar *tcode = code + 1 + LINK_SIZE;
747    
748    if (*code == OP_CBRA || *code == OP_SCBRA ||    if (*code == OP_CBRA || *code == OP_SCBRA ||
749        *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2;        *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
750    
751    while (try_next)    /* Loop for items in this branch */    while (try_next)    /* Loop for items in this branch */
752      {      {
# Line 790  do Line 832  do
832        case OP_SOM:        case OP_SOM:
833        case OP_THEN:        case OP_THEN:
834        case OP_THEN_ARG:        case OP_THEN_ARG:
835    #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
836        case OP_XCLASS:        case OP_XCLASS:
837    #endif
838        return SSB_FAIL;        return SSB_FAIL;
839    
840        /* We can ignore word boundary tests. */        /* We can ignore word boundary tests. */
841    
842        case OP_WORD_BOUNDARY:        case OP_WORD_BOUNDARY:
843        case OP_NOT_WORD_BOUNDARY:        case OP_NOT_WORD_BOUNDARY:
844        tcode++;        tcode++;
845        break;        break;
846    
847        /* If we hit a bracket or a positive lookahead assertion, recurse to set        /* If we hit a bracket or a positive lookahead assertion, recurse to set
848        bits from within the subpattern. If it can't find anything, we have to        bits from within the subpattern. If it can't find anything, we have to
# Line 814  do Line 858  do
858        case OP_CBRAPOS:        case OP_CBRAPOS:
859        case OP_SCBRAPOS:        case OP_SCBRAPOS:
860        case OP_ONCE:        case OP_ONCE:
861          case OP_ONCE_NC:
862        case OP_ASSERT:        case OP_ASSERT:
863        rc = set_start_bits(tcode, start_bits, utf8, cd);        rc = set_start_bits(tcode, start_bits, utf, cd);
864        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
865        if (rc == SSB_DONE) try_next = FALSE; else        if (rc == SSB_DONE) try_next = FALSE; else
866          {          {
# Line 862  do Line 907  do
907        case OP_BRAZERO:        case OP_BRAZERO:
908        case OP_BRAMINZERO:        case OP_BRAMINZERO:
909        case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
910        rc = set_start_bits(++tcode, start_bits, utf8, cd);        rc = set_start_bits(++tcode, start_bits, utf, cd);
911        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
912  /* =========================================================================  /* =========================================================================
913        See the comment at the head of this function concerning the next line,        See the comment at the head of this function concerning the next line,
# Line 889  do Line 934  do
934        case OP_QUERY:        case OP_QUERY:
935        case OP_MINQUERY:        case OP_MINQUERY:
936        case OP_POSQUERY:        case OP_POSQUERY:
937        tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);        tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
938        break;        break;
939    
940        case OP_STARI:        case OP_STARI:
# Line 898  do Line 943  do
943        case OP_QUERYI:        case OP_QUERYI:
944        case OP_MINQUERYI:        case OP_MINQUERYI:
945        case OP_POSQUERYI:        case OP_POSQUERYI:
946        tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);        tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
947        break;        break;
948    
949        /* Single-char upto sets the bit and tries the next */        /* Single-char upto sets the bit and tries the next */
# Line 906  do Line 951  do
951        case OP_UPTO:        case OP_UPTO:
952        case OP_MINUPTO:        case OP_MINUPTO:
953        case OP_POSUPTO:        case OP_POSUPTO:
954        tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8);        tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf);
955        break;        break;
956    
957        case OP_UPTOI:        case OP_UPTOI:
958        case OP_MINUPTOI:        case OP_MINUPTOI:
959        case OP_POSUPTOI:        case OP_POSUPTOI:
960        tcode = set_table_bit(start_bits, tcode + 3, TRUE, cd, utf8);        tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf);
961        break;        break;
962    
963        /* At least one single char sets the bit and stops */        /* At least one single char sets the bit and stops */
964    
965        case OP_EXACT:        case OP_EXACT:
966        tcode += 2;        tcode += IMM2_SIZE;
967        /* Fall through */        /* Fall through */
968        case OP_CHAR:        case OP_CHAR:
969        case OP_PLUS:        case OP_PLUS:
970        case OP_MINPLUS:        case OP_MINPLUS:
971        case OP_POSPLUS:        case OP_POSPLUS:
972        (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);        (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
973        try_next = FALSE;        try_next = FALSE;
974        break;        break;
975    
976        case OP_EXACTI:        case OP_EXACTI:
977        tcode += 2;        tcode += IMM2_SIZE;
978        /* Fall through */        /* Fall through */
979        case OP_CHARI:        case OP_CHARI:
980        case OP_PLUSI:        case OP_PLUSI:
981        case OP_MINPLUSI:        case OP_MINPLUSI:
982        case OP_POSPLUSI:        case OP_POSPLUSI:
983        (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8);        (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
984        try_next = FALSE;        try_next = FALSE;
985        break;        break;
986    
# Line 946  do Line 991  do
991        identical. */        identical. */
992    
993        case OP_HSPACE:        case OP_HSPACE:
994        SET_BIT(0x09);        SET_BIT(CHAR_HT);
995        SET_BIT(0x20);        SET_BIT(CHAR_SPACE);
996        if (utf8)  #ifdef SUPPORT_UTF
997          if (utf)
998          {          {
999    #ifdef COMPILE_PCRE8
1000          SET_BIT(0xC2);  /* For U+00A0 */          SET_BIT(0xC2);  /* For U+00A0 */
1001          SET_BIT(0xE1);  /* For U+1680, U+180E */          SET_BIT(0xE1);  /* For U+1680, U+180E */
1002          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
1003          SET_BIT(0xE3);  /* For U+3000 */          SET_BIT(0xE3);  /* For U+3000 */
1004    #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1005            SET_BIT(0xA0);
1006            SET_BIT(0xFF);  /* For characters > 255 */
1007    #endif  /* COMPILE_PCRE[8|16|32] */
1008            }
1009          else
1010    #endif /* SUPPORT_UTF */
1011            {
1012    #ifndef EBCDIC
1013            SET_BIT(0xA0);
1014    #endif  /* Not EBCDIC */
1015    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1016            SET_BIT(0xFF);  /* For characters > 255 */
1017    #endif  /* COMPILE_PCRE[16|32] */
1018          }          }
       else SET_BIT(0xA0);  
1019        try_next = FALSE;        try_next = FALSE;
1020        break;        break;
1021    
1022        case OP_ANYNL:        case OP_ANYNL:
1023        case OP_VSPACE:        case OP_VSPACE:
1024        SET_BIT(0x0A);        SET_BIT(CHAR_LF);
1025        SET_BIT(0x0B);        SET_BIT(CHAR_VT);
1026        SET_BIT(0x0C);        SET_BIT(CHAR_FF);
1027        SET_BIT(0x0D);        SET_BIT(CHAR_CR);
1028        if (utf8)  #ifdef SUPPORT_UTF
1029          if (utf)
1030          {          {
1031    #ifdef COMPILE_PCRE8
1032          SET_BIT(0xC2);  /* For U+0085 */          SET_BIT(0xC2);  /* For U+0085 */
1033          SET_BIT(0xE2);  /* For U+2028, U+2029 */          SET_BIT(0xE2);  /* For U+2028, U+2029 */
1034    #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1035            SET_BIT(CHAR_NEL);
1036            SET_BIT(0xFF);  /* For characters > 255 */
1037    #endif  /* COMPILE_PCRE[8|16|32] */
1038            }
1039          else
1040    #endif /* SUPPORT_UTF */
1041            {
1042            SET_BIT(CHAR_NEL);
1043    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1044            SET_BIT(0xFF);  /* For characters > 255 */
1045    #endif
1046          }          }
       else SET_BIT(0x85);  
1047        try_next = FALSE;        try_next = FALSE;
1048        break;        break;
1049    
# Line 990  do Line 1063  do
1063        break;        break;
1064    
1065        /* The cbit_space table has vertical tab as whitespace; we have to        /* The cbit_space table has vertical tab as whitespace; we have to
1066        ensure it is set as not whitespace. */        ensure it is set as not whitespace. Luckily, the code value is the same
1067          (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate bit. */
1068    
1069        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
1070        set_nottype_bits(start_bits, cbit_space, table_limit, cd);        set_nottype_bits(start_bits, cbit_space, table_limit, cd);
# Line 998  do Line 1072  do
1072        try_next = FALSE;        try_next = FALSE;
1073        break;        break;
1074    
1075        /* The cbit_space table has vertical tab as whitespace; we have to        /* The cbit_space table has vertical tab as whitespace; we have to not
1076        not set it from the table. */        set it from the table. Luckily, the code value is the same (0x0b) in
1077          ASCII and EBCDIC, so we can just adjust the appropriate bit. */
1078    
1079        case OP_WHITESPACE:        case OP_WHITESPACE:
1080        c = start_bits[1];    /* Save in case it was already set */        c = start_bits[1];    /* Save in case it was already set */
# Line 1028  do Line 1103  do
1103        break;        break;
1104    
1105        case OP_TYPEEXACT:        case OP_TYPEEXACT:
1106        tcode += 3;        tcode += 1 + IMM2_SIZE;
1107        break;        break;
1108    
1109        /* Zero or more repeats of character types set the bits and then        /* Zero or more repeats of character types set the bits and then
# Line 1037  do Line 1112  do
1112        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1113        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1114        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1115        tcode += 2;               /* Fall through */        tcode += IMM2_SIZE;  /* Fall through */
1116    
1117        case OP_TYPESTAR:        case OP_TYPESTAR:
1118        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
# Line 1053  do Line 1128  do
1128          return SSB_FAIL;          return SSB_FAIL;
1129    
1130          case OP_HSPACE:          case OP_HSPACE:
1131          SET_BIT(0x09);          SET_BIT(CHAR_HT);
1132          SET_BIT(0x20);          SET_BIT(CHAR_SPACE);
1133          if (utf8)  #ifdef SUPPORT_UTF
1134            if (utf)
1135            {            {
1136    #ifdef COMPILE_PCRE8
1137            SET_BIT(0xC2);  /* For U+00A0 */            SET_BIT(0xC2);  /* For U+00A0 */
1138            SET_BIT(0xE1);  /* For U+1680, U+180E */            SET_BIT(0xE1);  /* For U+1680, U+180E */
1139            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
1140            SET_BIT(0xE3);  /* For U+3000 */            SET_BIT(0xE3);  /* For U+3000 */
1141    #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1142              SET_BIT(0xA0);
1143              SET_BIT(0xFF);  /* For characters > 255 */
1144    #endif  /* COMPILE_PCRE[8|16|32] */
1145            }            }
1146          else SET_BIT(0xA0);          else
1147    #endif /* SUPPORT_UTF */
1148    #ifndef EBCDIC
1149              SET_BIT(0xA0);
1150    #endif  /* Not EBCDIC */
1151          break;          break;
1152    
1153          case OP_ANYNL:          case OP_ANYNL:
1154          case OP_VSPACE:          case OP_VSPACE:
1155          SET_BIT(0x0A);          SET_BIT(CHAR_LF);
1156          SET_BIT(0x0B);          SET_BIT(CHAR_VT);
1157          SET_BIT(0x0C);          SET_BIT(CHAR_FF);
1158          SET_BIT(0x0D);          SET_BIT(CHAR_CR);
1159          if (utf8)  #ifdef SUPPORT_UTF
1160            if (utf)
1161            {            {
1162    #ifdef COMPILE_PCRE8
1163            SET_BIT(0xC2);  /* For U+0085 */            SET_BIT(0xC2);  /* For U+0085 */
1164            SET_BIT(0xE2);  /* For U+2028, U+2029 */            SET_BIT(0xE2);  /* For U+2028, U+2029 */
1165    #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1166              SET_BIT(CHAR_NEL);
1167              SET_BIT(0xFF);  /* For characters > 255 */
1168    #endif  /* COMPILE_PCRE16 */
1169            }            }
1170          else SET_BIT(0x85);          else
1171    #endif /* SUPPORT_UTF */
1172              SET_BIT(CHAR_NEL);
1173          break;          break;
1174    
1175          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
# Line 1088  do Line 1181  do
1181          break;          break;
1182    
1183          /* The cbit_space table has vertical tab as whitespace; we have to          /* The cbit_space table has vertical tab as whitespace; we have to
1184          ensure it gets set as not whitespace. */          ensure it gets set as not whitespace. Luckily, the code value is the
1185            same (0x0b) in ASCII and EBCDIC, so we can just adjust the appropriate
1186            bit. */
1187    
1188          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
1189          set_nottype_bits(start_bits, cbit_space, table_limit, cd);          set_nottype_bits(start_bits, cbit_space, table_limit, cd);
# Line 1096  do Line 1191  do
1191          break;          break;
1192    
1193          /* The cbit_space table has vertical tab as whitespace; we have to          /* The cbit_space table has vertical tab as whitespace; we have to
1194          avoid setting it. */          avoid setting it. Luckily, the code value is the same (0x0b) in ASCII
1195            and EBCDIC, so we can just adjust the appropriate bit. */
1196    
1197          case OP_WHITESPACE:          case OP_WHITESPACE:
1198          c = start_bits[1];    /* Save in case it was already set */          c = start_bits[1];    /* Save in case it was already set */
# Line 1123  do Line 1219  do
1219        character with a value > 255. */        character with a value > 255. */
1220    
1221        case OP_NCLASS:        case OP_NCLASS:
1222  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1223        if (utf8)        if (utf)
1224          {          {
1225          start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */          start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */
1226          memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */          memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
1227          }          }
1228  #endif  #endif
1229    #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1230          SET_BIT(0xFF);                         /* For characters > 255 */
1231    #endif
1232        /* Fall through */        /* Fall through */
1233    
1234        case OP_CLASS:        case OP_CLASS:
1235          {          {
1236            pcre_uint8 *map;
1237          tcode++;          tcode++;
1238            map = (pcre_uint8 *)tcode;
1239    
1240          /* In UTF-8 mode, the bits in a bit map correspond to character          /* In UTF-8 mode, the bits in a bit map correspond to character
1241          values, not to byte values. However, the bit map we are constructing is          values, not to byte values. However, the bit map we are constructing is
# Line 1142  do Line 1243  do
1243          value is > 127. In fact, there are only two possible starting bytes for          value is > 127. In fact, there are only two possible starting bytes for
1244          characters in the range 128 - 255. */          characters in the range 128 - 255. */
1245    
1246  #ifdef SUPPORT_UTF8  #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1247          if (utf8)          if (utf)
1248            {            {
1249            for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];            for (c = 0; c < 16; c++) start_bits[c] |= map[c];
1250            for (c = 128; c < 256; c++)            for (c = 128; c < 256; c++)
1251              {              {
1252              if ((tcode[c/8] && (1 << (c&7))) != 0)              if ((map[c/8] && (1 << (c&7))) != 0)
1253                {                {
1254                int d = (c >> 6) | 0xc0;            /* Set bit for this starter */                int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
1255                start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */                start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
# Line 1156  do Line 1257  do
1257                }                }
1258              }              }
1259            }            }
   
         /* In non-UTF-8 mode, the two bit maps are completely compatible. */  
   
1260          else          else
1261  #endif  #endif
1262            {            {
1263            for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];            /* In non-UTF-8 mode, the two bit maps are completely compatible. */
1264              for (c = 0; c < 32; c++) start_bits[c] |= map[c];
1265            }            }
1266    
1267          /* Advance past the bit map, and act on what follows. For a zero          /* Advance past the bit map, and act on what follows. For a zero
1268          minimum repeat, continue; otherwise stop processing. */          minimum repeat, continue; otherwise stop processing. */
1269    
1270          tcode += 32;          tcode += 32 / sizeof(pcre_uchar);
1271          switch (*tcode)          switch (*tcode)
1272            {            {
1273            case OP_CRSTAR:            case OP_CRSTAR:
# Line 1180  do Line 1279  do
1279    
1280            case OP_CRRANGE:            case OP_CRRANGE:
1281            case OP_CRMINRANGE:            case OP_CRMINRANGE:
1282            if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;            if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
1283              else try_next = FALSE;              else try_next = FALSE;
1284            break;            break;
1285    
# Line 1209  return yield; Line 1308  return yield;
1308  *************************************************/  *************************************************/
1309    
1310  /* This function is handed a compiled expression that it must study to produce  /* This function is handed a compiled expression that it must study to produce
1311  information that will speed up the matching. It returns a pcre_extra block  information that will speed up the matching. It returns a pcre[16]_extra block
1312  which then gets handed back to pcre_exec().  which then gets handed back to pcre_exec().
1313    
1314  Arguments:  Arguments:
# Line 1218  Arguments: Line 1317  Arguments:
1317    errorptr  points to where to place error messages;    errorptr  points to where to place error messages;
1318              set NULL unless error              set NULL unless error
1319    
1320  Returns:    pointer to a pcre_extra block, with study_data filled in and the  Returns:    pointer to a pcre[16]_extra block, with study_data filled in and
1321                appropriate flags set;                the appropriate flags set;
1322              NULL on error or if no optimization possible              NULL on error or if no optimization possible
1323  */  */
1324    
1325    #if defined COMPILE_PCRE8
1326  PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION  PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
1327  pcre_study(const pcre *external_re, int options, const char **errorptr)  pcre_study(const pcre *external_re, int options, const char **errorptr)
1328    #elif defined COMPILE_PCRE16
1329    PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
1330    pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
1331    #elif defined COMPILE_PCRE32
1332    PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION
1333    pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
1334    #endif
1335  {  {
1336  int min;  int min;
1337  BOOL bits_set = FALSE;  BOOL bits_set = FALSE;
1338  BOOL had_accept = FALSE;  pcre_uint8 start_bits[32];
1339  uschar start_bits[32];  PUBL(extra) *extra = NULL;
 pcre_extra *extra;  
1340  pcre_study_data *study;  pcre_study_data *study;
1341  const uschar *tables;  const pcre_uint8 *tables;
1342  uschar *code;  pcre_uchar *code;
1343  compile_data compile_block;  compile_data compile_block;
1344  const real_pcre *re = (const real_pcre *)external_re;  const REAL_PCRE *re = (const REAL_PCRE *)external_re;
1345    
1346  *errorptr = NULL;  *errorptr = NULL;
1347    
# Line 1245  if (re == NULL || re->magic_number != MA Line 1351  if (re == NULL || re->magic_number != MA
1351    return NULL;    return NULL;
1352    }    }
1353    
1354    if ((re->flags & PCRE_MODE) == 0)
1355      {
1356    #if defined COMPILE_PCRE8
1357      *errorptr = "argument not compiled in 8 bit mode";
1358    #elif defined COMPILE_PCRE16
1359      *errorptr = "argument not compiled in 16 bit mode";
1360    #elif defined COMPILE_PCRE32
1361      *errorptr = "argument not compiled in 32 bit mode";
1362    #endif
1363      return NULL;
1364      }
1365    
1366  if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)  if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
1367    {    {
1368    *errorptr = "unknown or incorrect option bit(s) set";    *errorptr = "unknown or incorrect option bit(s) set";
1369    return NULL;    return NULL;
1370    }    }
1371    
1372  code = (uschar *)re + re->name_table_offset +  code = (pcre_uchar *)re + re->name_table_offset +
1373    (re->name_count * re->name_entry_size);    (re->name_count * re->name_entry_size);
1374    
1375  /* For an anchored pattern, or an unanchored pattern that has a first char, or  /* For an anchored pattern, or an unanchored pattern that has a first char, or
# Line 1266  if ((re->options & PCRE_ANCHORED) == 0 & Line 1384  if ((re->options & PCRE_ANCHORED) == 0 &
1384    /* Set the character tables in the block that is passed around */    /* Set the character tables in the block that is passed around */
1385    
1386    tables = re->tables;    tables = re->tables;
1387    
1388    #if defined COMPILE_PCRE8
1389    if (tables == NULL)    if (tables == NULL)
1390      (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,      (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1391      (void *)(&tables));      (void *)(&tables));
1392    #elif defined COMPILE_PCRE16
1393      if (tables == NULL)
1394        (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1395        (void *)(&tables));
1396    #elif defined COMPILE_PCRE32
1397      if (tables == NULL)
1398        (void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1399        (void *)(&tables));
1400    #endif
1401    
1402    compile_block.lcc = tables + lcc_offset;    compile_block.lcc = tables + lcc_offset;
1403    compile_block.fcc = tables + fcc_offset;    compile_block.fcc = tables + fcc_offset;
# Line 1277  if ((re->options & PCRE_ANCHORED) == 0 & Line 1406  if ((re->options & PCRE_ANCHORED) == 0 &
1406    
1407    /* See if we can find a fixed set of initial characters for the pattern. */    /* See if we can find a fixed set of initial characters for the pattern. */
1408    
1409    memset(start_bits, 0, 32 * sizeof(uschar));    memset(start_bits, 0, 32 * sizeof(pcre_uint8));
1410    rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,    rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
1411      &compile_block);      &compile_block);
1412    bits_set = rc == SSB_DONE;    bits_set = rc == SSB_DONE;
1413    if (rc == SSB_UNKNOWN) *errorptr = "internal error: opcode not recognized";    if (rc == SSB_UNKNOWN)
1414        {
1415        *errorptr = "internal error: opcode not recognized";
1416        return NULL;
1417        }
1418    }    }
1419    
1420  /* Find the minimum length of subject string. */  /* Find the minimum length of subject string. */
1421    
1422  switch(min = find_minlength(code, code, re->options, &had_accept, 0))  switch(min = find_minlength(code, code, re->options, 0))
1423    {    {
1424    case -2: *errorptr = "internal error: missing capturing bracket"; break;    case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
1425    case -3: *errorptr = "internal error: opcode not recognized"; break;    case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
1426    default: break;    default: break;
1427    }    }
1428    
1429  /* Return NULL if there's been an error or if no optimization is possible. */  /* If a set of starting bytes has been identified, or if the minimum length is
1430    greater than zero, or if JIT optimization has been requested, or if
1431    PCRE_STUDY_EXTRA_NEEDED is set, get a pcre[16]_extra block and a
1432    pcre_study_data block. The study data is put in the latter, which is pointed to
1433    by the former, which may also get additional data set later by the calling
1434    program. At the moment, the size of pcre_study_data is fixed. We nevertheless
1435    save it in a field for returning via the pcre_fullinfo() function so that if it
1436    becomes variable in the future, we don't have to change that code. */
1437    
1438    if (bits_set || min > 0 || (options & (
1439    #ifdef SUPPORT_JIT
1440        PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE |
1441        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE |
1442    #endif
1443        PCRE_STUDY_EXTRA_NEEDED)) != 0)
1444      {
1445      extra = (PUBL(extra) *)(PUBL(malloc))
1446        (sizeof(PUBL(extra)) + sizeof(pcre_study_data));
1447      if (extra == NULL)
1448        {
1449        *errorptr = "failed to get memory";
1450        return NULL;
1451        }
1452    
1453  if (*errorptr != NULL || (!bits_set && min < 0)) return NULL;    study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra)));
1454      extra->flags = PCRE_EXTRA_STUDY_DATA;
1455      extra->study_data = study;
1456    
1457      study->size = sizeof(pcre_study_data);
1458      study->flags = 0;
1459    
1460      /* Set the start bits always, to avoid unset memory errors if the
1461      study data is written to a file, but set the flag only if any of the bits
1462      are set, to save time looking when none are. */
1463    
1464  /* Get a pcre_extra block and a pcre_study_data block. The study data is put in    if (bits_set)
1465  the latter, which is pointed to by the former, which may also get additional      {
1466  data set later by the calling program. At the moment, the size of      study->flags |= PCRE_STUDY_MAPPED;
1467  pcre_study_data is fixed. We nevertheless save it in a field for returning via      memcpy(study->start_bits, start_bits, sizeof(start_bits));
1468  the pcre_fullinfo() function so that if it becomes variable in the future, we      }
1469  don't have to change that code. */    else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));
1470    
1471  extra = (pcre_extra *)(pcre_malloc)  #ifdef PCRE_DEBUG
1472    (sizeof(pcre_extra) + sizeof(pcre_study_data));    if (bits_set)
1473        {
1474        pcre_uint8 *ptr = start_bits;
1475        int i;
1476    
1477  if (extra == NULL)      printf("Start bits:\n");
1478    {      for (i = 0; i < 32; i++)
1479    *errorptr = "failed to get memory";        printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n");
1480    return NULL;      }
1481    }  #endif
1482    
1483  study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra));    /* Always set the minlength value in the block, because the JIT compiler
1484  extra->flags = PCRE_EXTRA_STUDY_DATA;    makes use of it. However, don't set the bit unless the length is greater than
1485  extra->study_data = study;    zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time
1486      checking the zero case. */
1487    
1488  study->size = sizeof(pcre_study_data);    if (min > 0)
1489  study->flags = 0;      {
1490        study->flags |= PCRE_STUDY_MINLEN;
1491        study->minlength = min;
1492        }
1493      else study->minlength = 0;
1494    
1495  if (bits_set)    /* If JIT support was compiled and requested, attempt the JIT compilation.
1496    {    If no starting bytes were found, and the minimum length is zero, and JIT
1497    study->flags |= PCRE_STUDY_MAPPED;    compilation fails, abandon the extra block and return NULL, unless
1498    memcpy(study->start_bits, start_bits, sizeof(start_bits));    PCRE_STUDY_EXTRA_NEEDED is set. */
1499    }  
1500    #ifdef SUPPORT_JIT
1501      extra->executable_jit = NULL;
1502      if ((options & PCRE_STUDY_JIT_COMPILE) != 0)
1503        PRIV(jit_compile)(re, extra, JIT_COMPILE);
1504      if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0)
1505        PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE);
1506      if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
1507        PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
1508    
1509  if (min >= 0)    if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0 &&
1510    {        (options & PCRE_STUDY_EXTRA_NEEDED) == 0)
1511    study->flags |= PCRE_STUDY_MINLEN;      {
1512    study->minlength = min;  #if defined COMPILE_PCRE8
1513        pcre_free_study(extra);
1514    #elif defined COMPILE_PCRE16
1515        pcre16_free_study(extra);
1516    #elif defined COMPILE_PCRE32
1517        pcre32_free_study(extra);
1518    #endif
1519        extra = NULL;
1520        }
1521    #endif
1522    }    }
1523    
1524  return extra;  return extra;
1525  }  }
1526    
1527    
1528    /*************************************************
1529    *          Free the study data                   *
1530    *************************************************/
1531    
1532    /* This function frees the memory that was obtained by pcre_study().
1533    
1534    Argument:   a pointer to the pcre[16]_extra block
1535    Returns:    nothing
1536    */
1537    
1538    #if defined COMPILE_PCRE8
1539    PCRE_EXP_DEFN void
1540    pcre_free_study(pcre_extra *extra)
1541    #elif defined COMPILE_PCRE16
1542    PCRE_EXP_DEFN void
1543    pcre16_free_study(pcre16_extra *extra)
1544    #elif defined COMPILE_PCRE32
1545    PCRE_EXP_DEFN void
1546    pcre32_free_study(pcre32_extra *extra)
1547    #endif
1548    {
1549    if (extra == NULL)
1550      return;
1551    #ifdef SUPPORT_JIT
1552    if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
1553         extra->executable_jit != NULL)
1554      PRIV(jit_free)(extra->executable_jit);
1555    #endif
1556    PUBL(free)(extra);
1557    }
1558    
1559  /* End of pcre_study.c */  /* End of pcre_study.c */

Legend:
Removed from v.657  
changed lines
  Added in v.1146

  ViewVC Help
Powered by ViewVC 1.1.5