/[pcre]/code/tags/pcre-1.09/pcre.c
ViewVC logotype

Diff of /code/tags/pcre-1.09/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcre.c revision 7 by nigel, Sat Feb 24 21:38:09 2007 UTC code/tags/pcre-1.09/pcre.c revision 22 by nigel, Sat Feb 24 21:38:39 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1998 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36    /* Use a macro for debugging printing, 'cause that eliminates the the use
37    of #ifdef inline, and there are *still* stupid compilers about that don't like
38    indented pre-processor statements. I suppose it's only been 10 years... */
39    
40    #ifdef DEBUG
41    #define DPRINTF(p) printf p
42    #else
43    #define DPRINTF(p) /*nothing*/
44    #endif
45    
46  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
47  the external pcre header. */  the external pcre header. */
# Line 40  the external pcre header. */ Line 49  the external pcre header. */
49  #include "internal.h"  #include "internal.h"
50    
51    
52    /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54    #ifdef __cplusplus
55    #define class pcre_class
56    #endif
57    
58    
59  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63    
64  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
65    
66  #ifdef DEBUG  #ifdef DEBUG
67  static const char *OP_names[] = {  static const char *OP_names[] = {
# Line 56  static const char *OP_names[] = { Line 72  static const char *OP_names[] = {
72    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
75    "class", "Ref",    "class", "negclass", "Ref",
76    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
78  };  };
# Line 67  are simple data values; negative values Line 83  are simple data values; negative values
83  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
84  is invalid. */  is invalid. */
85    
86  static short int escapes[] = {  static const short int escapes[] = {
87      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 82  static short int escapes[] = { Line 98  static short int escapes[] = {
98    
99  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
100    
101  static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **);  static BOOL
102      compile_regex(int, int *, uschar **, const uschar **, const char **);
103    
104  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
105  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 187  Arguments: Line 204  Arguments:
204  Returns:     nothing  Returns:     nothing
205  */  */
206    
207  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
208    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
209  {  {
210  int c;  int c;
211  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 246  do { Line 264  do {
264        case OP_KETRMIN:        case OP_KETRMIN:
265        return TRUE;        return TRUE;
266    
267          /* Skip over entire bracket groups with zero lower bound */
268    
269          case OP_BRAZERO:
270          case OP_BRAMINZERO:
271          cc++;
272          /* Fall through */
273    
274        /* Skip over assertive subpatterns */        /* Skip over assertive subpatterns */
275    
276        case OP_ASSERT:        case OP_ASSERT:
# Line 260  do { Line 285  do {
285        case OP_EOD:        case OP_EOD:
286        case OP_CIRC:        case OP_CIRC:
287        case OP_DOLL:        case OP_DOLL:
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
288        case OP_NOT_WORD_BOUNDARY:        case OP_NOT_WORD_BOUNDARY:
289        case OP_WORD_BOUNDARY:        case OP_WORD_BOUNDARY:
290        cc++;        cc++;
# Line 296  do { Line 319  do {
319        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
320    
321        case OP_CLASS:        case OP_CLASS:
322          case OP_NEGCLASS:
323        case OP_REF:        case OP_REF:
324        cc += (*cc == OP_REF)? 2 : 33;        cc += (*cc == OP_REF)? 2 : 33;
325    
# Line 599  compile_branch(int options, int *bracket Line 623  compile_branch(int options, int *bracket
623  int repeat_type, op_type;  int repeat_type, op_type;
624  int repeat_min, repeat_max;  int repeat_min, repeat_max;
625  int bravalue, length;  int bravalue, length;
626    int greedy_default, greedy_non_default;
627  register int c;  register int c;
628  register uschar *code = *codeptr;  register uschar *code = *codeptr;
629  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
# Line 606  const uschar *oldptr; Line 631  const uschar *oldptr;
631  uschar *previous = NULL;  uschar *previous = NULL;
632  uschar class[32];  uschar class[32];
633    
634    /* Set up the default and non-default settings for greediness */
635    
636    greedy_default = ((options & PCRE_UNGREEDY) != 0);
637    greedy_non_default = greedy_default ^ 1;
638    
639  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
640    
641  for (;; ptr++)  for (;; ptr++)
# Line 660  for (;; ptr++) Line 690  for (;; ptr++)
690    
691      case '[':      case '[':
692      previous = code;      previous = code;
     *code++ = OP_CLASS;  
693    
694      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
695        different opcode. This only matters if caseless matching is specified at
696        runtime. */
697    
698      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
699        {        {
700        negate_class = TRUE;        negate_class = TRUE;
701          *code++ = OP_NEGCLASS;
702        c = *(++ptr);        c = *(++ptr);
703        }        }
704      else negate_class = FALSE;      else
705          {
706          negate_class = FALSE;
707          *code++ = OP_CLASS;
708          }
709    
710      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
711      character. */      character. */
# Line 877  for (;; ptr++) Line 913  for (;; ptr++)
913        goto FAILED;        goto FAILED;
914        }        }
915    
916      /* If the next character is '?' this is a minimizing repeat. Advance to the      /* If the next character is '?' this is a minimizing repeat, by default,
917        but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
918      next character. */      next character. */
919    
920      if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;      if (ptr[1] == '?')
921          { repeat_type = greedy_non_default; ptr++; }
922        else repeat_type = greedy_default;
923    
924      /* If the maximum is zero then the minimum must also be zero; Perl allows      /* If the maximum is zero then the minimum must also be zero; Perl allows
925      this case, so we do too - by simply omitting the item altogether. */      this case, so we do too - by simply omitting the item altogether. */
# Line 969  for (;; ptr++) Line 1008  for (;; ptr++)
1008          /* If the mininum is 1 and the previous item was a character string,          /* If the mininum is 1 and the previous item was a character string,
1009          we either have to put back the item that got cancelled if the string          we either have to put back the item that got cancelled if the string
1010          length was 1, or add the character back onto the end of a longer          length was 1, or add the character back onto the end of a longer
1011          string. For a character type nothing need be done; it will just get put          string. For a character type nothing need be done; it will just get
1012          back naturally. */          put back naturally. Note that the final character is always going to
1013            get added below. */
1014    
1015          else if (*previous == OP_CHARS)          else if (*previous == OP_CHARS)
1016            {            {
1017            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1018            }            }
1019    
1020          /* Insert an UPTO if the max is greater than the min. */          /*  For a single negated character we also have to put back the
1021            item that got cancelled. */
1022    
1023            else if (*previous == OP_NOT) code++;
1024    
1025            /* If the maximum is unlimited, insert an OP_STAR. */
1026    
1027            if (repeat_max < 0)
1028              {
1029              *code++ = c;
1030              *code++ = OP_STAR + repeat_type;
1031              }
1032    
1033            /* Else insert an UPTO if the max is greater than the min. */
1034    
1035          if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
1036            {            {
1037            *code++ = c;            *code++ = c;
1038            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 997  for (;; ptr++) Line 1050  for (;; ptr++)
1050      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1051      stuff after it. */      stuff after it. */
1052    
1053      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1054                 *previous == OP_REF)
1055        {        {
1056        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1057          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1023  for (;; ptr++) Line 1077  for (;; ptr++)
1077      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA)
1078        {        {
1079        int i;        int i;
1080        int length = code - previous;        int len = code - previous;
1081    
1082        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_max == -1 && could_be_empty(previous))
1083          {          {
# Line 1040  for (;; ptr++) Line 1094  for (;; ptr++)
1094          {          {
1095          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1096            {            {
1097            memcpy(code, previous, length);            memcpy(code, previous, len);
1098            code += length;            code += len;
1099            }            }
1100          }          }
1101    
# Line 1053  for (;; ptr++) Line 1107  for (;; ptr++)
1107          {          {
1108          if (repeat_min == 0)          if (repeat_min == 0)
1109            {            {
1110            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1111            code++;            code++;
1112            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1113            }            }
1114    
1115          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1116            {            {
1117            memcpy(code, previous, length);            memcpy(code, previous, len);
1118            code += length;            code += len;
1119            }            }
1120    
1121          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1122            {            {
1123            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1124            memcpy(code, previous, length);            memcpy(code, previous, len);
1125            code += length;            code += len;
1126            }            }
1127          }          }
1128    
# Line 1110  for (;; ptr++) Line 1164  for (;; ptr++)
1164          case 'm':          case 'm':
1165          case 's':          case 's':
1166          case 'x':          case 'x':
1167            case 'U':
1168            case 'X':
1169          ptr++;          ptr++;
1170          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
1171          previous = NULL;          previous = NULL;
# Line 1269  for (;; ptr++) Line 1325  for (;; ptr++)
1325      the next state. */      the next state. */
1326    
1327      previous[1] = length;      previous[1] = length;
1328      ptr--;      if (length < 255) ptr--;
1329      break;      break;
1330      }      }
1331    }                   /* end of big loop */    }                   /* end of big loop */
# Line 1529  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1585  if ((options & ~PUBLIC_OPTIONS) != 0)
1585    return NULL;    return NULL;
1586    }    }
1587    
1588  #ifdef DEBUG  DPRINTF(("------------------------------------------------------------------\n"));
1589  printf("------------------------------------------------------------------\n");  DPRINTF(("%s\n", pattern));
 printf("%s\n", pattern);  
 #endif  
1590    
1591  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
1592  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1647  while ((c = *(++ptr)) != 0) Line 1701  while ((c = *(++ptr)) != 0)
1701        {        {
1702        if (*ptr == '\\')        if (*ptr == '\\')
1703          {          {
1704          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1705          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1706          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1707          }          }
1708        else class_charcount++;        else class_charcount++;
1709        ptr++;        ptr++;
# Line 1664  while ((c = *(++ptr)) != 0) Line 1718  while ((c = *(++ptr)) != 0)
1718    
1719        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
1720    
1721        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1722          {          {
1723          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1724          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
# Line 1715  while ((c = *(++ptr)) != 0) Line 1769  while ((c = *(++ptr)) != 0)
1769          ptr += 2;          ptr += 2;
1770          break;          break;
1771          }          }
1772        /* Else fall thourh */        /* Else fall through */
1773    
1774        /* Else loop setting valid options until ) is met. Anything else is an        /* Else loop setting valid options until ) is met. Anything else is an
1775        error. */        error. */
# Line 1745  while ((c = *(++ptr)) != 0) Line 1799  while ((c = *(++ptr)) != 0)
1799            length -= spaces;          /* Already counted spaces */            length -= spaces;          /* Already counted spaces */
1800            continue;            continue;
1801            }            }
1802            else if (c == 'X')
1803              {
1804              options |= PCRE_EXTRA;
1805              continue;
1806              }
1807            else if (c == 'U')
1808              {
1809              options |= PCRE_UNGREEDY;
1810              continue;
1811              }
1812          else if (c == ')') break;          else if (c == ')') break;
1813    
1814          *errorptr = ERR12;          *errorptr = ERR12;
# Line 1772  while ((c = *(++ptr)) != 0) Line 1836  while ((c = *(++ptr)) != 0)
1836      continue;      continue;
1837    
1838      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
1839      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
1840        0 this is an unmatched bracket which will generate an error, but take care
1841        not to try to access brastack[-1]. */
1842    
1843      case ')':      case ')':
1844      length += 3;      length += 3;
1845        {        {
1846        int min = 1;        int minval = 1;
1847        int max = 1;        int maxval = 1;
1848        int duplength = length - brastack[--brastackptr];        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1849    
1850        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
1851        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
1852    
1853        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1854          {          {
1855          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1856          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1857          }          }
1858        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1859        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
1860        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
1861    
1862        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1863        there is a limited maximum we have to replicate up to max-1 times and        if there is a limited maximum we have to replicate up to maxval-1 times
1864        allow for a BRAZERO item before each optional copy, as we also have to        and allow for a BRAZERO item before each optional copy, as we also have
1865        do before the first copy if the minimum is zero. */        to do before the first copy if the minimum is zero. */
1866    
1867        if (min == 0) length++;        if (minval == 0) length++;
1868          else if (min > 1) length += (min - 1) * duplength;          else if (minval > 1) length += (minval - 1) * duplength;
1869        if (max > min) length += (max - min) * (duplength + 1);        if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1870        }        }
   
1871      continue;      continue;
1872    
1873      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1863  if (length > 65539) Line 1928  if (length > 65539)
1928    }    }
1929    
1930  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
1931  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
1932    rather than just "code", because it has been reported that one broken compiler
1933    fails on "code" because it is also an independent variable. It should make no
1934    difference to the value of the offsetof(). */
1935    
1936  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
1937  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
1938    
1939  if (re == NULL)  if (re == NULL)
# Line 1874  if (re == NULL) Line 1942  if (re == NULL)
1942    return NULL;    return NULL;
1943    }    }
1944    
1945    /* Put in the magic number and the options. */
1946    
1947  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
1948  re->options = options;  re->options = options;
1949    
# Line 1924  if ((options & PCRE_ANCHORED) == 0) Line 1994  if ((options & PCRE_ANCHORED) == 0)
1994      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
1995    else    else
1996      {      {
1997      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code);
1998      if (c >= 0)      if (ch >= 0)
1999        {        {
2000        re->first_char = c;        re->first_char = ch;
2001        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
2002        }        }
2003      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 1944  printf("Length = %d top_bracket = %d top Line 2014  printf("Length = %d top_bracket = %d top
2014    
2015  if (re->options != 0)  if (re->options != 0)
2016    {    {
2017    printf("%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s\n",
2018      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2019      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2020      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2021      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2022      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2023      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2024      ((re->options & PCRE_EXTRA) != 0)? "extra " : "");      ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2025        ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2026    }    }
2027    
2028  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
# Line 2019  while (code < code_end) Line 2090  while (code < code_end)
2090      case OP_MINUPTO:      case OP_MINUPTO:
2091      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2092        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2093      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2094      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2095      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2096      code += 3;      code += 3;
# Line 2064  while (code < code_end) Line 2135  while (code < code_end)
2135    
2136      case OP_REF:      case OP_REF:
2137      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2138      break;      code ++;
2139        goto CLASS_REF_REPEAT;
2140    
2141      case OP_CLASS:      case OP_CLASS:
2142        case OP_NEGCLASS:
2143        {        {
2144        int i, min, max;        int i, min, max;
2145    
2146        code++;        if (*code++ == OP_CLASS) printf("    [");
2147        printf("    [");          else printf("   ^[");
2148    
2149        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2150          {          {
# Line 2094  while (code < code_end) Line 2167  while (code < code_end)
2167        printf("]");        printf("]");
2168        code += 32;        code += 32;
2169    
2170          CLASS_REF_REPEAT:
2171    
2172        switch(*code)        switch(*code)
2173          {          {
2174          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2278  for (;;) Line 2353  for (;;)
2353      int number = (*ecode - OP_BRA) << 1;      int number = (*ecode - OP_BRA) << 1;
2354      int save_offset1 = 0, save_offset2 = 0;      int save_offset1 = 0, save_offset2 = 0;
2355    
2356      #ifdef DEBUG      DPRINTF(("start bracket %d\n", number/2));
     printf("start bracket %d\n", number/2);  
     #endif  
2357    
2358      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2359        {        {
# Line 2288  for (;;) Line 2361  for (;;)
2361        save_offset2 = md->offset_vector[number+1];        save_offset2 = md->offset_vector[number+1];
2362        md->offset_vector[number] = eptr - md->start_subject;        md->offset_vector[number] = eptr - md->start_subject;
2363    
2364        #ifdef DEBUG        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
       printf("saving %d %d\n", save_offset1, save_offset2);  
       #endif  
2365        }        }
2366    
2367      /* Recurse for all the alternatives. */      /* Recurse for all the alternatives. */
# Line 2302  for (;;) Line 2373  for (;;)
2373        }        }
2374      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2375    
2376      #ifdef DEBUG      DPRINTF(("bracket %d failed\n", number/2));
     printf("bracket %d failed\n", number/2);  
     #endif  
2377    
2378      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2379        {        {
# Line 2443  for (;;) Line 2512  for (;;)
2512    
2513        number = (*prev - OP_BRA) << 1;        number = (*prev - OP_BRA) << 1;
2514    
2515        #ifdef DEBUG        DPRINTF(("end bracket %d\n", number/2));
       printf("end bracket %d\n", number/2);  
       #endif  
2516    
2517        if (number > 0)        if (number > 0)
2518          {          {
# Line 2697  for (;;) Line 2764  for (;;)
2764      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2765      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2766      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2767      versions of a character. */      versions of a character, and we have to behave differently for positive and
2768        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2769        treated differently. */
2770    
2771      case OP_CLASS:      case OP_CLASS:
2772        case OP_NEGCLASS:
2773        {        {
2774          BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2775        const uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
2776        ecode += 33;                     /* Advance past the item */        ecode += 33;                     /* Advance past the item */
2777    
# Line 2729  for (;;) Line 2800  for (;;)
2800          break;          break;
2801    
2802          default:               /* No repeat follows */          default:               /* No repeat follows */
2803          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2804          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2805          }          }
2806    
2807        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2746  for (;;) Line 2810  for (;;)
2810          {          {
2811          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2812          c = *eptr++;          c = *eptr++;
2813          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2814          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2815            runtime caseless, continue if either case is in the map. */
2816    
2817            if (!nasty_case)
2818              {
2819              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2820              if (md->runtime_caseless)
2821                {
2822                c = pcre_fcc[c];
2823                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2824                }
2825              }
2826    
2827            /* Runtime caseless and it was a negative class. Continue only if
2828            both cases are in the map. */
2829    
2830            else
2831            {            {
2832              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2833            c = pcre_fcc[c];            c = pcre_fcc[c];
2834            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2835            }            }
2836    
2837          return FALSE;          return FALSE;
2838          }          }
2839    
# Line 2770  for (;;) Line 2852  for (;;)
2852            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2853            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2854            c = *eptr++;            c = *eptr++;
2855            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2856            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2857              runtime caseless, continue if either case is in the map. */
2858    
2859              if (!nasty_case)
2860                {
2861                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2862                if (md->runtime_caseless)
2863                  {
2864                  c = pcre_fcc[c];
2865                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2866                  }
2867                }
2868    
2869              /* Runtime caseless and it was a negative class. Continue only if
2870              both cases are in the map. */
2871    
2872              else
2873              {              {
2874                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2875              c = pcre_fcc[c];              c = pcre_fcc[c];
2876              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2877              }              }
2878    
2879            return FALSE;            return FALSE;
2880            }            }
2881          /* Control never gets here */          /* Control never gets here */
# Line 2790  for (;;) Line 2890  for (;;)
2890            {            {
2891            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2892            c = *eptr;            c = *eptr;
2893            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2894            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2895              runtime caseless, continue if either case is in the map. */
2896    
2897              if (!nasty_case)
2898              {              {
2899                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2900                if (md->runtime_caseless)
2901                  {
2902                  c = pcre_fcc[c];
2903                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2904                  }
2905                }
2906    
2907              /* Runtime caseless and it was a negative class. Continue only if
2908              both cases are in the map. */
2909    
2910              else
2911                {
2912                if ((data[c/8] & (1 << (c&7))) == 0) break;
2913              c = pcre_fcc[c];              c = pcre_fcc[c];
2914              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2915              }              }
2916    
2917            break;            break;
2918            }            }
2919    
# Line 2813  for (;;) Line 2931  for (;;)
2931        register int length = ecode[1];        register int length = ecode[1];
2932        ecode += 2;        ecode += 2;
2933    
2934        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
2935        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
2936          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
2937        else        else
# Line 2824  for (;;) Line 2942  for (;;)
2942          }          }
2943        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
2944        printf("\n");        printf("\n");
2945        #endif  #endif
2946    
2947        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
2948        if (md->caseless)        if (md->caseless)
# Line 2881  for (;;) Line 2999  for (;;)
2999      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3000      characters and work backwards. */      characters and work backwards. */
3001    
3002      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
3003      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3004    
3005      if (md->caseless)      if (md->caseless)
3006        {        {
# Line 2949  for (;;) Line 3065  for (;;)
3065      /* Match a negated single character */      /* Match a negated single character */
3066    
3067      case OP_NOT:      case OP_NOT:
3068      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3069      ecode++;      ecode++;
3070      if (md->caseless)      if (md->caseless)
3071        {        {
# Line 3008  for (;;) Line 3124  for (;;)
3124      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3125      characters and work backwards. */      characters and work backwards. */
3126    
3127      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3128      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3129    
3130      if (md->caseless)      if (md->caseless)
3131        {        {
# Line 3261  for (;;) Line 3375  for (;;)
3375      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
3376    
3377      default:      default:
3378      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
3379      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3380      return FALSE;      return FALSE;
3381      }      }
# Line 3279  for (;;) Line 3391  for (;;)
3391    
3392    
3393  /*************************************************  /*************************************************
3394    *         Segregate setjmp()                     *
3395    *************************************************/
3396    
3397    /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3398    is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3399    hide it in a separate function. This is called only when PCRE_EXTRA is set,
3400    since it's needed only for the extension \X option, and with any luck, a good
3401    compiler will spot the tail recursion and compile it efficiently.
3402    
3403    Arguments:
3404       eptr        pointer in subject
3405       ecode       position in code
3406       offset_top  current top pointer
3407       md          pointer to "static" info for the match
3408    
3409    Returns:       TRUE if matched
3410    */
3411    
3412    static BOOL
3413    match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3414      match_data *match_block)
3415    {
3416    return setjmp(match_block->fail_env) == 0 &&
3417          match(eptr, ecode, offset_top, match_block);
3418    }
3419    
3420    
3421    
3422    /*************************************************
3423  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
3424  *************************************************/  *************************************************/
3425    
# Line 3305  int Line 3446  int
3446  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3447    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int options, int *offsets, int offsetcount)
3448  {  {
3449  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
3450  int first_char = -1;  int first_char = -1;
3451  match_data match_block;  match_data match_block;
3452  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3453  const uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject;
3454  const uschar *end_subject;  const uschar *end_subject;
3455  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3456  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3457    BOOL using_temporary_offsets = FALSE;
3458  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3459  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3460    
# Line 3342  match_block.errorcode = PCRE_ERROR_NOMAT Line 3483  match_block.errorcode = PCRE_ERROR_NOMAT
3483    
3484  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
3485  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
3486  Otherwise, we can use the vector supplied, rounding down the size of it to a  Otherwise, we can use the vector supplied, rounding down its size to a multiple
3487  multiple of 2. */  of 2. */
3488    
3489  ocount &= (-2);  ocount = offsetcount & (-2);
3490  if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3491    {    {
3492    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3493    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3494    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3495    #ifdef DEBUG    using_temporary_offsets = TRUE;
3496    printf("Got memory to hold back references\n");    DPRINTF(("Got memory to hold back references\n"));
   #endif  
3497    }    }
3498  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
3499    
# Line 3406  if (!anchored) Line 3546  if (!anchored)
3546    
3547  do  do
3548    {    {
3549      int rc;
3550    register int *iptr = match_block.offset_vector;    register int *iptr = match_block.offset_vector;
3551    register int *iend = iptr + resetcount;    register int *iend = iptr + resetcount;
3552    
# Line 3447  do Line 3588  do
3588        }        }
3589      }      }
3590    
3591    #ifdef DEBUG  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
3592    printf(">>>> Match against: ");    printf(">>>> Match against: ");
3593    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, &match_block);
3594    printf("\n");    printf("\n");
3595    #endif  #endif
3596    
3597    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
3598    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
# Line 3461  do Line 3602  do
3602    if certain parts of the pattern were not used.    if certain parts of the pattern were not used.
3603    
3604    Before starting the match, we have to set up a longjmp() target to enable    Before starting the match, we have to set up a longjmp() target to enable
3605    the "cut" operation to fail a match completely without backtracking. */    the "cut" operation to fail a match completely without backtracking. This
3606      is done in a separate function to avoid compiler warnings. We need not do
3607      it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3608      enabled. */
3609    
3610    if (setjmp(match_block.fail_env) == 0 &&    if ((re->options & PCRE_EXTRA) != 0)
       match(start_match, re->code, 2, &match_block))  
3611      {      {
3612      int rc;      if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3613          continue;
3614      if (ocount != offsetcount)      }
3615        {    else if (!match(start_match, re->code, 2, &match_block)) continue;
       if (offsetcount >= 4)  
         {  
         memcpy(offsets + 2, match_block.offset_vector + 2,  
           (offsetcount - 2) * sizeof(int));  
         #ifdef DEBUG  
         printf("Copied offsets; freeing temporary memory\n");  
         #endif  
         }  
       if (match_block.end_offset_top > offsetcount)  
         match_block.offset_overflow = TRUE;  
3616    
3617        #ifdef DEBUG    /* Copy the offset information from temporary store if necessary */
       printf("Freeing temporary memory\n");  
       #endif  
3618    
3619        (pcre_free)(match_block.offset_vector);    if (using_temporary_offsets)
3620        {
3621        if (offsetcount >= 4)
3622          {
3623          memcpy(offsets + 2, match_block.offset_vector + 2,
3624            (offsetcount - 2) * sizeof(int));
3625          DPRINTF(("Copied offsets from temporary memory\n"));
3626        }        }
3627        if (match_block.end_offset_top > offsetcount)
3628          match_block.offset_overflow = TRUE;
3629    
3630      rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;      DPRINTF(("Freeing temporary memory\n"));
3631        (pcre_free)(match_block.offset_vector);
3632        }
3633    
3634      if (match_block.offset_end < 2) rc = 0; else    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       {  
       offsets[0] = start_match - match_block.start_subject;  
       offsets[1] = match_block.end_match_ptr - match_block.start_subject;  
       }  
3635    
3636      #ifdef DEBUG    if (match_block.offset_end < 2) rc = 0; else
3637      printf(">>>> returning %d\n", rc);      {
3638      #endif      offsets[0] = start_match - match_block.start_subject;
3639      return rc;      offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3640      }      }
3641    
3642      DPRINTF((">>>> returning %d\n", rc));
3643      return rc;
3644    }    }
3645  while (!anchored &&  while (!anchored &&
3646         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3647         start_match++ < end_subject);         start_match++ < end_subject);
3648    
3649  #ifdef DEBUG  if (using_temporary_offsets)
3650  printf(">>>> returning %d\n", match_block.errorcode);    {
3651  #endif    DPRINTF(("Freeing temporary memory\n"));
3652      (pcre_free)(match_block.offset_vector);
3653      }
3654    
3655    DPRINTF((">>>> returning %d\n", match_block.errorcode));
3656    
3657  return match_block.errorcode;  return match_block.errorcode;
3658  }  }

Legend:
Removed from v.7  
changed lines
  Added in v.22

  ViewVC Help
Powered by ViewVC 1.1.5