/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 11 by nigel, Sat Feb 24 21:38:17 2007 UTC revision 17 by nigel, Sat Feb 24 21:38:29 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1998 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 49  the external pcre header. */ Line 49  the external pcre header. */
49  #include "internal.h"  #include "internal.h"
50    
51    
52    /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54    #ifdef __cplusplus
55    #define class pcre_class
56    #endif
57    
58    
59  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63    
64  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
65    
66  #ifdef DEBUG  #ifdef DEBUG
67  static const char *OP_names[] = {  static const char *OP_names[] = {
# Line 65  static const char *OP_names[] = { Line 72  static const char *OP_names[] = {
72    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
75    "class", "Ref",    "class", "negclass", "Ref",
76    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
78  };  };
# Line 76  are simple data values; negative values Line 83  are simple data values; negative values
83  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
84  is invalid. */  is invalid. */
85    
86  static short int escapes[] = {  static const short int escapes[] = {
87      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 91  static short int escapes[] = { Line 98  static short int escapes[] = {
98    
99  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
100    
101  static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **);  static BOOL
102      compile_regex(int, int *, uschar **, const uschar **, const char **);
103    
104  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
105  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 256  do { Line 264  do {
264        case OP_KETRMIN:        case OP_KETRMIN:
265        return TRUE;        return TRUE;
266    
267          /* Skip over entire bracket groups with zero lower bound */
268    
269          case OP_BRAZERO:
270          case OP_BRAMINZERO:
271          cc++;
272          /* Fall through */
273    
274        /* Skip over assertive subpatterns */        /* Skip over assertive subpatterns */
275    
276        case OP_ASSERT:        case OP_ASSERT:
# Line 270  do { Line 285  do {
285        case OP_EOD:        case OP_EOD:
286        case OP_CIRC:        case OP_CIRC:
287        case OP_DOLL:        case OP_DOLL:
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
288        case OP_NOT_WORD_BOUNDARY:        case OP_NOT_WORD_BOUNDARY:
289        case OP_WORD_BOUNDARY:        case OP_WORD_BOUNDARY:
290        cc++;        cc++;
# Line 306  do { Line 319  do {
319        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
320    
321        case OP_CLASS:        case OP_CLASS:
322          case OP_NEGCLASS:
323        case OP_REF:        case OP_REF:
324        cc += (*cc == OP_REF)? 2 : 33;        cc += (*cc == OP_REF)? 2 : 33;
325    
# Line 670  for (;; ptr++) Line 684  for (;; ptr++)
684    
685      case '[':      case '[':
686      previous = code;      previous = code;
     *code++ = OP_CLASS;  
687    
688      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
689        different opcode. This only matters if caseless matching is specified at
690        runtime. */
691    
692      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
693        {        {
694        negate_class = TRUE;        negate_class = TRUE;
695          *code++ = OP_NEGCLASS;
696        c = *(++ptr);        c = *(++ptr);
697        }        }
698      else negate_class = FALSE;      else
699          {
700          negate_class = FALSE;
701          *code++ = OP_CLASS;
702          }
703    
704      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
705      character. */      character. */
# Line 1015  for (;; ptr++) Line 1035  for (;; ptr++)
1035      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1036      stuff after it. */      stuff after it. */
1037    
1038      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1039                 *previous == OP_REF)
1040        {        {
1041        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1042          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1287  for (;; ptr++) Line 1308  for (;; ptr++)
1308      the next state. */      the next state. */
1309    
1310      previous[1] = length;      previous[1] = length;
1311      ptr--;      if (length < 255) ptr--;
1312      break;      break;
1313      }      }
1314    }                   /* end of big loop */    }                   /* end of big loop */
# Line 2090  while (code < code_end) Line 2111  while (code < code_end)
2111      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
2112    
2113      case OP_CLASS:      case OP_CLASS:
2114        case OP_NEGCLASS:
2115        {        {
2116        int i, min, max;        int i, min, max;
2117    
2118        code++;        if (*code++ == OP_CLASS) printf("    [");
2119        printf("    [");          else printf("   ^[");
2120    
2121        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2122          {          {
# Line 2714  for (;;) Line 2736  for (;;)
2736      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2737      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2738      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2739      versions of a character. */      versions of a character, and we have to behave differently for positive and
2740        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2741        treated differently. */
2742    
2743      case OP_CLASS:      case OP_CLASS:
2744        case OP_NEGCLASS:
2745        {        {
2746          BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2747        const uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
2748        ecode += 33;                     /* Advance past the item */        ecode += 33;                     /* Advance past the item */
2749    
# Line 2746  for (;;) Line 2772  for (;;)
2772          break;          break;
2773    
2774          default:               /* No repeat follows */          default:               /* No repeat follows */
2775          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2776          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2777          }          }
2778    
2779        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2763  for (;;) Line 2782  for (;;)
2782          {          {
2783          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2784          c = *eptr++;          c = *eptr++;
2785          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2786          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2787            runtime caseless, continue if either case is in the map. */
2788    
2789            if (!nasty_case)
2790            {            {
2791              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2792              if (md->runtime_caseless)
2793                {
2794                c = pcre_fcc[c];
2795                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2796                }
2797              }
2798    
2799            /* Runtime caseless and it was a negative class. Continue only if
2800            both cases are in the map. */
2801    
2802            else
2803              {
2804              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2805            c = pcre_fcc[c];            c = pcre_fcc[c];
2806            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2807            }            }
2808    
2809          return FALSE;          return FALSE;
2810          }          }
2811    
# Line 2787  for (;;) Line 2824  for (;;)
2824            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2825            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2826            c = *eptr++;            c = *eptr++;
2827            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2828            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2829              runtime caseless, continue if either case is in the map. */
2830    
2831              if (!nasty_case)
2832                {
2833                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2834                if (md->runtime_caseless)
2835                  {
2836                  c = pcre_fcc[c];
2837                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2838                  }
2839                }
2840    
2841              /* Runtime caseless and it was a negative class. Continue only if
2842              both cases are in the map. */
2843    
2844              else
2845              {              {
2846                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2847              c = pcre_fcc[c];              c = pcre_fcc[c];
2848              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2849              }              }
2850    
2851            return FALSE;            return FALSE;
2852            }            }
2853          /* Control never gets here */          /* Control never gets here */
# Line 2807  for (;;) Line 2862  for (;;)
2862            {            {
2863            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2864            c = *eptr;            c = *eptr;
2865            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2866            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2867              runtime caseless, continue if either case is in the map. */
2868    
2869              if (!nasty_case)
2870              {              {
2871                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2872                if (md->runtime_caseless)
2873                  {
2874                  c = pcre_fcc[c];
2875                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2876                  }
2877                }
2878    
2879              /* Runtime caseless and it was a negative class. Continue only if
2880              both cases are in the map. */
2881    
2882              else
2883                {
2884                if ((data[c/8] & (1 << (c&7))) == 0) break;
2885              c = pcre_fcc[c];              c = pcre_fcc[c];
2886              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2887              }              }
2888    
2889            break;            break;
2890            }            }
2891    
# Line 3389  ocount = offsetcount & (-2); Line 3462  ocount = offsetcount & (-2);
3462  if (re->top_backref > 0 && re->top_backref >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3463    {    {
3464    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3465    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3466    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3467    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
3468    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));

Legend:
Removed from v.11  
changed lines
  Added in v.17

  ViewVC Help
Powered by ViewVC 1.1.5