/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 9 by nigel, Sat Feb 24 21:38:13 2007 UTC revision 15 by nigel, Sat Feb 24 21:38:25 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1998 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 49  the external pcre header. */ Line 49  the external pcre header. */
49  #include "internal.h"  #include "internal.h"
50    
51    
52    /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54    #ifdef __cplusplus
55    #define class pcre_class
56    #endif
57    
58    
59  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63    
64  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
65    
66  #ifdef DEBUG  #ifdef DEBUG
67  static const char *OP_names[] = {  static const char *OP_names[] = {
# Line 65  static const char *OP_names[] = { Line 72  static const char *OP_names[] = {
72    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
75    "class", "Ref",    "class", "negclass", "Ref",
76    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
78  };  };
# Line 76  are simple data values; negative values Line 83  are simple data values; negative values
83  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
84  is invalid. */  is invalid. */
85    
86  static short int escapes[] = {  static const short int escapes[] = {
87      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 91  static short int escapes[] = { Line 98  static short int escapes[] = {
98    
99  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
100    
101  static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **);  static BOOL
102      compile_regex(int, int *, uschar **, const uschar **, const char **);
103    
104  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
105  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 306  do { Line 314  do {
314        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
315    
316        case OP_CLASS:        case OP_CLASS:
317          case OP_NEGCLASS:
318        case OP_REF:        case OP_REF:
319        cc += (*cc == OP_REF)? 2 : 33;        cc += (*cc == OP_REF)? 2 : 33;
320    
# Line 670  for (;; ptr++) Line 679  for (;; ptr++)
679    
680      case '[':      case '[':
681      previous = code;      previous = code;
     *code++ = OP_CLASS;  
682    
683      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
684        different opcode. This only matters if caseless matching is specified at
685        runtime. */
686    
687      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
688        {        {
689        negate_class = TRUE;        negate_class = TRUE;
690          *code++ = OP_NEGCLASS;
691        c = *(++ptr);        c = *(++ptr);
692        }        }
693      else negate_class = FALSE;      else
694          {
695          negate_class = FALSE;
696          *code++ = OP_CLASS;
697          }
698    
699      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
700      character. */      character. */
# Line 1015  for (;; ptr++) Line 1030  for (;; ptr++)
1030      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1031      stuff after it. */      stuff after it. */
1032    
1033      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1034                 *previous == OP_REF)
1035        {        {
1036        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1037          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1287  for (;; ptr++) Line 1303  for (;; ptr++)
1303      the next state. */      the next state. */
1304    
1305      previous[1] = length;      previous[1] = length;
1306      ptr--;      if (length < 255) ptr--;
1307      break;      break;
1308      }      }
1309    }                   /* end of big loop */    }                   /* end of big loop */
# Line 2041  while (code < code_end) Line 2057  while (code < code_end)
2057      case OP_MINUPTO:      case OP_MINUPTO:
2058      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2059        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2060      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2061      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2062      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2063      code += 3;      code += 3;
# Line 2090  while (code < code_end) Line 2106  while (code < code_end)
2106      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
2107    
2108      case OP_CLASS:      case OP_CLASS:
2109        case OP_NEGCLASS:
2110        {        {
2111        int i, min, max;        int i, min, max;
2112    
2113        code++;        if (*code++ == OP_CLASS) printf("    [");
2114        printf("    [");          else printf("   ^[");
2115    
2116        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2117          {          {
# Line 2714  for (;;) Line 2731  for (;;)
2731      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2732      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2733      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2734      versions of a character. */      versions of a character, and we have to behave differently for positive and
2735        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2736        treated differently. */
2737    
2738      case OP_CLASS:      case OP_CLASS:
2739        case OP_NEGCLASS:
2740        {        {
2741          BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2742        const uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
2743        ecode += 33;                     /* Advance past the item */        ecode += 33;                     /* Advance past the item */
2744    
# Line 2746  for (;;) Line 2767  for (;;)
2767          break;          break;
2768    
2769          default:               /* No repeat follows */          default:               /* No repeat follows */
2770          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2771          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2772          }          }
2773    
2774        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2763  for (;;) Line 2777  for (;;)
2777          {          {
2778          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2779          c = *eptr++;          c = *eptr++;
2780          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2781          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2782            runtime caseless, continue if either case is in the map. */
2783    
2784            if (!nasty_case)
2785            {            {
2786              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2787              if (md->runtime_caseless)
2788                {
2789                c = pcre_fcc[c];
2790                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2791                }
2792              }
2793    
2794            /* Runtime caseless and it was a negative class. Continue only if
2795            both cases are in the map. */
2796    
2797            else
2798              {
2799              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2800            c = pcre_fcc[c];            c = pcre_fcc[c];
2801            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2802            }            }
2803    
2804          return FALSE;          return FALSE;
2805          }          }
2806    
# Line 2787  for (;;) Line 2819  for (;;)
2819            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2820            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2821            c = *eptr++;            c = *eptr++;
2822            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2823            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2824              runtime caseless, continue if either case is in the map. */
2825    
2826              if (!nasty_case)
2827                {
2828                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2829                if (md->runtime_caseless)
2830                  {
2831                  c = pcre_fcc[c];
2832                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2833                  }
2834                }
2835    
2836              /* Runtime caseless and it was a negative class. Continue only if
2837              both cases are in the map. */
2838    
2839              else
2840              {              {
2841                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2842              c = pcre_fcc[c];              c = pcre_fcc[c];
2843              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2844              }              }
2845    
2846            return FALSE;            return FALSE;
2847            }            }
2848          /* Control never gets here */          /* Control never gets here */
# Line 2807  for (;;) Line 2857  for (;;)
2857            {            {
2858            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2859            c = *eptr;            c = *eptr;
2860            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2861            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2862              runtime caseless, continue if either case is in the map. */
2863    
2864              if (!nasty_case)
2865                {
2866                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2867                if (md->runtime_caseless)
2868                  {
2869                  c = pcre_fcc[c];
2870                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2871                  }
2872                }
2873    
2874              /* Runtime caseless and it was a negative class. Continue only if
2875              both cases are in the map. */
2876    
2877              else
2878              {              {
2879                if ((data[c/8] & (1 << (c&7))) == 0) break;
2880              c = pcre_fcc[c];              c = pcre_fcc[c];
2881              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2882              }              }
2883    
2884            break;            break;
2885            }            }
2886    
# Line 3299  hide it in a separate function. This is Line 3367  hide it in a separate function. This is
3367  since it's needed only for the extension \X option, and with any luck, a good  since it's needed only for the extension \X option, and with any luck, a good
3368  compiler will spot the tail recursion and compile it efficiently.  compiler will spot the tail recursion and compile it efficiently.
3369    
3370  Arguments:    The block containing the match data  Arguments:
3371  Returns:      The return from setjump()     eptr        pointer in subject
3372       ecode       position in code
3373       offset_top  current top pointer
3374       md          pointer to "static" info for the match
3375    
3376    Returns:       TRUE if matched
3377  */  */
3378    
3379  static int  static BOOL
3380  my_setjmp(match_data *match_block)  match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3381      match_data *match_block)
3382  {  {
3383  return setjmp(match_block->fail_env);  return setjmp(match_block->fail_env) == 0 &&
3384          match(eptr, ecode, offset_top, match_block);
3385  }  }
3386    
3387    
# Line 3338  int Line 3413  int
3413  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3414    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int options, int *offsets, int offsetcount)
3415  {  {
3416  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
3417  int first_char = -1;  int first_char = -1;
3418  match_data match_block;  match_data match_block;
3419  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
# Line 3347  const uschar *start_match = (const uscha Line 3421  const uschar *start_match = (const uscha
3421  const uschar *end_subject;  const uschar *end_subject;
3422  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3423  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3424    BOOL using_temporary_offsets = FALSE;
3425  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3426  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3427    
# Line 3375  match_block.errorcode = PCRE_ERROR_NOMAT Line 3450  match_block.errorcode = PCRE_ERROR_NOMAT
3450    
3451  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
3452  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
3453  Otherwise, we can use the vector supplied, rounding down the size of it to a  Otherwise, we can use the vector supplied, rounding down its size to a multiple
3454  multiple of 2. */  of 2. */
3455    
3456  ocount &= (-2);  ocount = offsetcount & (-2);
3457  if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3458    {    {
3459    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3460    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3461    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3462      using_temporary_offsets = TRUE;
3463    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
3464    }    }
3465  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
# Line 3498  do Line 3574  do
3574    it unless PCRE_EXTRA is set, since only in that case is the "cut" operation    it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3575    enabled. */    enabled. */
3576    
3577    if (((re->options & PCRE_EXTRA) != 0 && my_setjmp(&match_block) != 0) ||    if ((re->options & PCRE_EXTRA) != 0)
3578        !match(start_match, re->code, 2, &match_block))      {
3579      continue;      if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3580          continue;
3581        }
3582      else if (!match(start_match, re->code, 2, &match_block)) continue;
3583    
3584    /* Copy the offset information from temporary store if necessary */    /* Copy the offset information from temporary store if necessary */
3585    
3586    if (ocount != offsetcount)    if (using_temporary_offsets)
3587      {      {
3588      if (offsetcount >= 4)      if (offsetcount >= 4)
3589        {        {
3590        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, match_block.offset_vector + 2,
3591          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
3592        DPRINTF(("Copied offsets; freeing temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
3593        }        }
3594      if (match_block.end_offset_top > offsetcount)      if (match_block.end_offset_top > offsetcount)
3595        match_block.offset_overflow = TRUE;        match_block.offset_overflow = TRUE;
# Line 3534  while (!anchored && Line 3613  while (!anchored &&
3613         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3614         start_match++ < end_subject);         start_match++ < end_subject);
3615    
3616    if (using_temporary_offsets)
3617      {
3618      DPRINTF(("Freeing temporary memory\n"));
3619      (pcre_free)(match_block.offset_vector);
3620      }
3621    
3622  DPRINTF((">>>> returning %d\n", match_block.errorcode));  DPRINTF((">>>> returning %d\n", match_block.errorcode));
3623    
3624  return match_block.errorcode;  return match_block.errorcode;

Legend:
Removed from v.9  
changed lines
  Added in v.15

  ViewVC Help
Powered by ViewVC 1.1.5