/[pcre]/code/tags/pcre-1.04/pcre.c
ViewVC logotype

Diff of /code/tags/pcre-1.04/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 5 by nigel, Sat Feb 24 21:38:05 2007 UTC revision 9 by nigel, Sat Feb 24 21:38:13 2007 UTC
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36    /* Use a macro for debugging printing, 'cause that eliminates the the use
37    of #ifdef inline, and there are *still* stupid compilers about that don't like
38    indented pre-processor statements. I suppose it's only been 10 years... */
39    
40    #ifdef DEBUG
41    #define DPRINTF(p) printf p
42    #else
43    #define DPRINTF(p) /*nothing*/
44    #endif
45    
46  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
47  the external pcre header. */  the external pcre header. */
# Line 48  static char rep_max[] = { 0, 0, 0, 0, 1, Line 57  static char rep_max[] = { 0, 0, 0, 0, 1,
57  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging */
58    
59  #ifdef DEBUG  #ifdef DEBUG
60  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
61      "End", "\\A", "\\B", "\\b", "\\D", "\\d",
62    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
63    "not",    "not",
64    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
# Line 81  static short int escapes[] = { Line 91  static short int escapes[] = {
91    
92  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
93    
94  static BOOL compile_regex(int, int *,uschar **,uschar **,char **);  static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **);
95    
96  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
97  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 98  typedef struct match_data { Line 108  typedef struct match_data {
108    BOOL   noteol;                /* NOTEOL flag */    BOOL   noteol;                /* NOTEOL flag */
109    BOOL   dotall;                /* Dot matches any char */    BOOL   dotall;                /* Dot matches any char */
110    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
111    uschar *start_subject;        /* Start of the subject string */    const uschar *start_subject;  /* Start of the subject string */
112    uschar *end_subject;          /* End of the subject string */    const uschar *end_subject;    /* End of the subject string */
113    jmp_buf fail_env;             /* Environment for longjump() break out */    jmp_buf fail_env;             /* Environment for longjump() break out */
114    uschar *end_match_ptr;        /* Subject position at end match */    const uschar *end_match_ptr;  /* Subject position at end match */
115    int     end_offset_top;       /* Highwater mark at end of match */    int     end_offset_top;       /* Highwater mark at end of match */
116  } match_data;  } match_data;
117    
# Line 126  void  (*pcre_free)(void *) = free; Line 136  void  (*pcre_free)(void *) = free;
136  *          Return version string                 *  *          Return version string                 *
137  *************************************************/  *************************************************/
138    
139  char *  const char *
140  pcre_version(void)  pcre_version(void)
141  {  {
142  return PCRE_VERSION;  return PCRE_VERSION;
# Line 156  Returns:        number of identifying ex Line 166  Returns:        number of identifying ex
166  int  int
167  pcre_info(const pcre *external_re, int *optptr, int *first_char)  pcre_info(const pcre *external_re, int *optptr, int *first_char)
168  {  {
169  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
170  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
171  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
172  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
# Line 186  Arguments: Line 196  Arguments:
196  Returns:     nothing  Returns:     nothing
197  */  */
198    
199  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
200    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
201  {  {
202  int c;  int c;
203  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 360  Returns:     zero or positive => a data Line 371  Returns:     zero or positive => a data
371  */  */
372    
373  static int  static int
374  check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
375    BOOL isclass)    int options, BOOL isclass)
376  {  {
377  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
378  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
379  int i;  int i;
380    
# Line 382  else if ((i = escapes[c - '0']) != 0) c Line 393  else if ((i = escapes[c - '0']) != 0) c
393    
394  else  else
395    {    {
396    uschar *oldptr;    const uschar *oldptr;
397    switch (c)    switch (c)
398      {      {
399      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 502  Returns:    TRUE or FALSE Line 513  Returns:    TRUE or FALSE
513  */  */
514    
515  static BOOL  static BOOL
516  is_counted_repeat(uschar *p)  is_counted_repeat(const uschar *p)
517  {  {
518  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
519  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
# Line 537  Returns:     pointer to '}' on success; Line 548  Returns:     pointer to '}' on success;
548               current ptr on error, with errorptr set               current ptr on error, with errorptr set
549  */  */
550    
551  static uschar *  static const uschar *
552  read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
553  {  {
554  int min = 0;  int min = 0;
555  int max = -1;  int max = -1;
# Line 592  Returns:     TRUE on success Line 603  Returns:     TRUE on success
603  */  */
604    
605  static BOOL  static BOOL
606  compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_branch(int options, int *brackets, uschar **codeptr,
607    char **errorptr)    const uschar **ptrptr, const char **errorptr)
608  {  {
609  int repeat_type, op_type;  int repeat_type, op_type;
610  int repeat_min, repeat_max;  int repeat_min, repeat_max;
611  int bravalue, length;  int bravalue, length;
612  register int c;  register int c;
613  register uschar *code = *codeptr;  register uschar *code = *codeptr;
614  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
615    const uschar *oldptr;
616  uschar *previous = NULL;  uschar *previous = NULL;
 uschar *oldptr;  
617  uschar class[32];  uschar class[32];
618    
619  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
# Line 697  for (;; ptr++) Line 708  for (;; ptr++)
708        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
709        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
710        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
711        Inside a class (and only there) it is treated as backslash. Elsewhere        Inside a class (and only there) it is treated as backspace. Elsewhere
712        it marks a word boundary. Other escapes have preset maps ready to        it marks a word boundary. Other escapes have preset maps ready to
713        or into the one we are building. We assume they have more than one        or into the one we are building. We assume they have more than one
714        character in them, so set class_count bigger than one. */        character in them, so set class_count bigger than one. */
# Line 976  for (;; ptr++) Line 987  for (;; ptr++)
987            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
988            }            }
989    
990          /* Insert an UPTO if the max is greater than the min. */          /* If the maximum is unlimited, insert an OP_STAR. */
991    
992            if (repeat_max < 0)
993              {
994              *code++ = c;
995              *code++ = OP_STAR + repeat_type;
996              }
997    
998            /* Else insert an UPTO if the max is greater than the min. */
999    
1000          if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
1001            {            {
1002            *code++ = c;            *code++ = c;
1003            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 1022  for (;; ptr++) Line 1041  for (;; ptr++)
1041      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA)
1042        {        {
1043        int i;        int i;
1044        int length = code - previous;        int len = code - previous;
1045    
1046        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_max == -1 && could_be_empty(previous))
1047          {          {
# Line 1039  for (;; ptr++) Line 1058  for (;; ptr++)
1058          {          {
1059          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1060            {            {
1061            memcpy(code, previous, length);            memcpy(code, previous, len);
1062            code += length;            code += len;
1063            }            }
1064          }          }
1065    
# Line 1052  for (;; ptr++) Line 1071  for (;; ptr++)
1071          {          {
1072          if (repeat_min == 0)          if (repeat_min == 0)
1073            {            {
1074            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1075            code++;            code++;
1076            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1077            }            }
1078    
1079          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1080            {            {
1081            memcpy(code, previous, length);            memcpy(code, previous, len);
1082            code += length;            code += len;
1083            }            }
1084    
1085          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1086            {            {
1087            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1088            memcpy(code, previous, length);            memcpy(code, previous, len);
1089            code += length;            code += len;
1090            }            }
1091          }          }
1092    
# Line 1214  for (;; ptr++) Line 1233  for (;; ptr++)
1233        continue;        continue;
1234        }        }
1235    
1236      /* Reset and fall through */      /* Data character: reset and fall through */
1237    
1238      ptr = oldptr;      ptr = oldptr;
1239      c = '\\';      c = '\\';
# Line 1305  Returns:    TRUE on success Line 1324  Returns:    TRUE on success
1324  */  */
1325    
1326  static BOOL  static BOOL
1327  compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_regex(int options, int *brackets, uschar **codeptr,
1328    char **errorptr)    const uschar **ptrptr, const char **errorptr)
1329  {  {
1330  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1331  uschar *code = *codeptr;  uschar *code = *codeptr;
1332  uschar *start_bracket = code;  uschar *start_bracket = code;
1333    
# Line 1374  Returns:   TRUE or FALSE Line 1393  Returns:   TRUE or FALSE
1393  */  */
1394    
1395  static BOOL  static BOOL
1396  is_anchored(register uschar *code, BOOL multiline)  is_anchored(register const uschar *code, BOOL multiline)
1397  {  {
1398  do {  do {
1399     int op = (int)code[3];     int op = (int)code[3];
# Line 1403  Returns:   TRUE or FALSE Line 1422  Returns:   TRUE or FALSE
1422  */  */
1423    
1424  static BOOL  static BOOL
1425  is_startline(uschar *code)  is_startline(const uschar *code)
1426  {  {
1427  do {  do {
1428     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
# Line 1488  Returns:       pointer to compiled data Line 1507  Returns:       pointer to compiled data
1507  */  */
1508    
1509  pcre *  pcre *
1510  pcre_compile(const char *pattern, int options, char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1511    int *erroroffset)    int *erroroffset)
1512  {  {
1513  real_pcre *re;  real_pcre *re;
# Line 1498  int runlength; Line 1517  int runlength;
1517  int c, size;  int c, size;
1518  int bracount = 0;  int bracount = 0;
1519  int brastack[200];  int brastack[200];
 int brastackptr = 0;  
1520  int top_backref = 0;  int top_backref = 0;
1521  uschar *code, *ptr;  unsigned int brastackptr = 0;
1522    uschar *code;
1523    const uschar *ptr;
1524    
1525  #ifdef DEBUG  #ifdef DEBUG
1526  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1527  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1547  if ((options & ~PUBLIC_OPTIONS) != 0)
1547    return NULL;    return NULL;
1548    }    }
1549    
1550  #ifdef DEBUG  DPRINTF(("------------------------------------------------------------------\n"));
1551  printf("------------------------------------------------------------------\n");  DPRINTF(("%s\n", pattern));
 printf("%s\n", pattern);  
 #endif  
1552    
1553  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
1554  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1539  internal flag settings. Make an attempt Line 1557  internal flag settings. Make an attempt
1557  if an "extended" flag setting appears late in the pattern. We can't be so  if an "extended" flag setting appears late in the pattern. We can't be so
1558  clever for #-comments. */  clever for #-comments. */
1559    
1560  ptr = (uschar *)(pattern - 1);  ptr = (const uschar *)(pattern - 1);
1561  while ((c = *(++ptr)) != 0)  while ((c = *(++ptr)) != 0)
1562    {    {
1563    int min, max;    int min, max;
# Line 1566  while ((c = *(++ptr)) != 0) Line 1584  while ((c = *(++ptr)) != 0)
1584    
1585      case '\\':      case '\\':
1586        {        {
1587        uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
1588        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1589        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1590        if (c >= 0)        if (c >= 0)
# Line 1645  while ((c = *(++ptr)) != 0) Line 1663  while ((c = *(++ptr)) != 0)
1663        {        {
1664        if (*ptr == '\\')        if (*ptr == '\\')
1665          {          {
1666          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1667          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1668          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1669          }          }
1670        else class_charcount++;        else class_charcount++;
1671        ptr++;        ptr++;
# Line 1662  while ((c = *(++ptr)) != 0) Line 1680  while ((c = *(++ptr)) != 0)
1680    
1681        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
1682    
1683        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1684          {          {
1685          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1686          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
# Line 1770  while ((c = *(++ptr)) != 0) Line 1788  while ((c = *(++ptr)) != 0)
1788      continue;      continue;
1789    
1790      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
1791      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
1792        0 this is an unmatched bracket which will generate an error, but take care
1793        not to try to access brastack[-1]. */
1794    
1795      case ')':      case ')':
1796      length += 3;      length += 3;
1797        {        {
1798        int min = 1;        int minval = 1;
1799        int max = 1;        int maxval = 1;
1800        int duplength = length - brastack[--brastackptr];        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1801    
1802        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
1803        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
1804    
1805        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1806          {          {
1807          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1808          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1809          }          }
1810        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1811        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
1812        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
1813    
1814        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1815        there is a limited maximum we have to replicate up to max-1 times and        if there is a limited maximum we have to replicate up to maxval-1 times
1816        allow for a BRAZERO item before each optional copy, as we also have to        and allow for a BRAZERO item before each optional copy, as we also have
1817        do before the first copy if the minimum is zero. */        to do before the first copy if the minimum is zero. */
1818    
1819        if (min == 0) length++;        if (minval == 0) length++;
1820          else if (min > 1) length += (min - 1) * duplength;          else if (minval > 1) length += (minval - 1) * duplength;
1821        if (max > min) length += (max - min) * (duplength + 1);        if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1822        }        }
   
1823      continue;      continue;
1824    
1825      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1831  while ((c = *(++ptr)) != 0) Line 1850  while ((c = *(++ptr)) != 0)
1850    
1851        if (c == '\\')        if (c == '\\')
1852          {          {
1853          uschar *saveptr = ptr;          const uschar *saveptr = ptr;
1854          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1855          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1856          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
# Line 1861  if (length > 65539) Line 1880  if (length > 65539)
1880    }    }
1881    
1882  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
1883  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
1884    rather than just "code", because it has been reported that one broken compiler
1885    fails on "code" because it is also an independent variable. It should make no
1886    difference to the value of the offsetof(). */
1887    
1888  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
1889  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
1890    
1891  if (re == NULL)  if (re == NULL)
# Line 1872  if (re == NULL) Line 1894  if (re == NULL)
1894    return NULL;    return NULL;
1895    }    }
1896    
1897    /* Put in the magic number and the options. */
1898    
1899  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
1900  re->options = options;  re->options = options;
1901    
# Line 1879  re->options = options; Line 1903  re->options = options;
1903  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
1904  of the function here. */  of the function here. */
1905    
1906  ptr = (uschar *)pattern;  ptr = (const uschar *)pattern;
1907  code = re->code;  code = re->code;
1908  *code = OP_BRA;  *code = OP_BRA;
1909  bracount = 0;  bracount = 0;
# Line 1906  if (*errorptr != NULL) Line 1930  if (*errorptr != NULL)
1930    {    {
1931    (pcre_free)(re);    (pcre_free)(re);
1932    PCRE_ERROR_RETURN:    PCRE_ERROR_RETURN:
1933    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
1934    return NULL;    return NULL;
1935    }    }
1936    
# Line 1922  if ((options & PCRE_ANCHORED) == 0) Line 1946  if ((options & PCRE_ANCHORED) == 0)
1946      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
1947    else    else
1948      {      {
1949      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code);
1950      if (c >= 0)      if (ch >= 0)
1951        {        {
1952        re->first_char = c;        re->first_char = ch;
1953        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
1954        }        }
1955      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 2062  while (code < code_end) Line 2086  while (code < code_end)
2086    
2087      case OP_REF:      case OP_REF:
2088      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2089      break;      code ++;
2090        goto CLASS_REF_REPEAT;
2091    
2092      case OP_CLASS:      case OP_CLASS:
2093        {        {
# Line 2092  while (code < code_end) Line 2117  while (code < code_end)
2117        printf("]");        printf("]");
2118        code += 32;        code += 32;
2119    
2120          CLASS_REF_REPEAT:
2121    
2122        switch(*code)        switch(*code)
2123          {          {
2124          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2204  Returns:      TRUE if matched Line 2231  Returns:      TRUE if matched
2231  */  */
2232    
2233  static BOOL  static BOOL
2234  match_ref(int number, register uschar *eptr, int length, match_data *md)  match_ref(int number, register const uschar *eptr, int length, match_data *md)
2235  {  {
2236  uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[number];
2237    
2238  #ifdef DEBUG  #ifdef DEBUG
2239  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2253  Returns:       TRUE if matched Line 2280  Returns:       TRUE if matched
2280  */  */
2281    
2282  static BOOL  static BOOL
2283  match(register uschar *eptr, register uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2284    match_data *md)    match_data *md)
2285  {  {
2286  for (;;)  for (;;)
# Line 2261  for (;;) Line 2288  for (;;)
2288    int min, max, ctype;    int min, max, ctype;
2289    register int i;    register int i;
2290    register int c;    register int c;
2291    BOOL minimize;    BOOL minimize = FALSE;
2292    
2293    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening bracket. Check the alternative branches in turn, failing if none
2294    match. We have to set the start offset if required and there is space    match. We have to set the start offset if required and there is space
# Line 2274  for (;;) Line 2301  for (;;)
2301    if ((int)*ecode >= OP_BRA)    if ((int)*ecode >= OP_BRA)
2302      {      {
2303      int number = (*ecode - OP_BRA) << 1;      int number = (*ecode - OP_BRA) << 1;
2304      int save_offset1, save_offset2;      int save_offset1 = 0, save_offset2 = 0;
2305    
2306      #ifdef DEBUG      DPRINTF(("start bracket %d\n", number/2));
     printf("start bracket %d\n", number/2);  
     #endif  
2307    
2308      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2309        {        {
# Line 2286  for (;;) Line 2311  for (;;)
2311        save_offset2 = md->offset_vector[number+1];        save_offset2 = md->offset_vector[number+1];
2312        md->offset_vector[number] = eptr - md->start_subject;        md->offset_vector[number] = eptr - md->start_subject;
2313    
2314        #ifdef DEBUG        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
       printf("saving %d %d\n", save_offset1, save_offset2);  
       #endif  
2315        }        }
2316    
2317      /* Recurse for all the alternatives. */      /* Recurse for all the alternatives. */
# Line 2300  for (;;) Line 2323  for (;;)
2323        }        }
2324      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2325    
2326      #ifdef DEBUG      DPRINTF(("bracket %d failed\n", number/2));
     printf("bracket %d failed\n", number/2);  
     #endif  
2327    
2328      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2329        {        {
# Line 2401  for (;;) Line 2422  for (;;)
2422    
2423      case OP_BRAZERO:      case OP_BRAZERO:
2424        {        {
2425        uschar *next = ecode+1;        const uschar *next = ecode+1;
2426        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md)) return TRUE;
2427        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2428        ecode = next + 3;        ecode = next + 3;
# Line 2410  for (;;) Line 2431  for (;;)
2431    
2432      case OP_BRAMINZERO:      case OP_BRAMINZERO:
2433        {        {
2434        uschar *next = ecode+1;        const uschar *next = ecode+1;
2435        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2436        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md)) return TRUE;
2437        ecode++;        ecode++;
# Line 2426  for (;;) Line 2447  for (;;)
2447      case OP_KETRMAX:      case OP_KETRMAX:
2448        {        {
2449        int number;        int number;
2450        uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2451    
2452        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2453          {          {
# Line 2441  for (;;) Line 2462  for (;;)
2462    
2463        number = (*prev - OP_BRA) << 1;        number = (*prev - OP_BRA) << 1;
2464    
2465        #ifdef DEBUG        DPRINTF(("end bracket %d\n", number/2));
       printf("end bracket %d\n", number/2);  
       #endif  
2466    
2467        if (number > 0)        if (number > 0)
2468          {          {
# Line 2675  for (;;) Line 2694  for (;;)
2694    
2695        else        else
2696          {          {
2697          uschar *pp = eptr;          const uschar *pp = eptr;
2698          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2699            {            {
2700            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(number, eptr, length, md)) break;
# Line 2699  for (;;) Line 2718  for (;;)
2718    
2719      case OP_CLASS:      case OP_CLASS:
2720        {        {
2721        uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
2722        ecode += 33;               /* Advance past the item */        ecode += 33;                     /* Advance past the item */
2723    
2724        switch (*ecode)        switch (*ecode)
2725          {          {
# Line 2783  for (;;) Line 2802  for (;;)
2802    
2803        else        else
2804          {          {
2805          uschar *pp = eptr;          const uschar *pp = eptr;
2806          for (i = min; i < max; eptr++, i++)          for (i = min; i < max; eptr++, i++)
2807            {            {
2808            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
# Line 2811  for (;;) Line 2830  for (;;)
2830        register int length = ecode[1];        register int length = ecode[1];
2831        ecode += 2;        ecode += 2;
2832    
2833        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
2834        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
2835          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
2836        else        else
# Line 2822  for (;;) Line 2841  for (;;)
2841          }          }
2842        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
2843        printf("\n");        printf("\n");
2844        #endif  #endif
2845    
2846        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
2847        if (md->caseless)        if (md->caseless)
# Line 2879  for (;;) Line 2898  for (;;)
2898      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
2899      characters and work backwards. */      characters and work backwards. */
2900    
2901      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2902      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
2903    
2904      if (md->caseless)      if (md->caseless)
2905        {        {
# Line 2901  for (;;) Line 2918  for (;;)
2918          }          }
2919        else        else
2920          {          {
2921          uschar *pp = eptr;          const uschar *pp = eptr;
2922          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2923            {            {
2924            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
# Line 2931  for (;;) Line 2948  for (;;)
2948          }          }
2949        else        else
2950          {          {
2951          uschar *pp = eptr;          const uschar *pp = eptr;
2952          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2953            {            {
2954            if (eptr >= md->end_subject || c != *eptr) break;            if (eptr >= md->end_subject || c != *eptr) break;
# Line 2947  for (;;) Line 2964  for (;;)
2964      /* Match a negated single character */      /* Match a negated single character */
2965    
2966      case OP_NOT:      case OP_NOT:
2967      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
2968      ecode++;      ecode++;
2969      if (md->caseless)      if (md->caseless)
2970        {        {
# Line 3006  for (;;) Line 3023  for (;;)
3023      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3024      characters and work backwards. */      characters and work backwards. */
3025    
3026      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3027      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3028    
3029      if (md->caseless)      if (md->caseless)
3030        {        {
# Line 3028  for (;;) Line 3043  for (;;)
3043          }          }
3044        else        else
3045          {          {
3046          uschar *pp = eptr;          const uschar *pp = eptr;
3047          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3048            {            {
3049            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
# Line 3058  for (;;) Line 3073  for (;;)
3073          }          }
3074        else        else
3075          {          {
3076          uschar *pp = eptr;          const uschar *pp = eptr;
3077          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3078            {            {
3079            if (eptr >= md->end_subject || c == *eptr) break;            if (eptr >= md->end_subject || c == *eptr) break;
# Line 3175  for (;;) Line 3190  for (;;)
3190    
3191      else      else
3192        {        {
3193        uschar *pp = eptr;        const uschar *pp = eptr;
3194        switch(ctype)        switch(ctype)
3195          {          {
3196          case OP_ANY:          case OP_ANY:
# Line 3259  for (;;) Line 3274  for (;;)
3274      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
3275    
3276      default:      default:
3277      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
3278      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3279      return FALSE;      return FALSE;
3280      }      }
# Line 3277  for (;;) Line 3290  for (;;)
3290    
3291    
3292  /*************************************************  /*************************************************
3293    *         Segregate setjmp()                     *
3294    *************************************************/
3295    
3296    /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3297    is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3298    hide it in a separate function. This is called only when PCRE_EXTRA is set,
3299    since it's needed only for the extension \X option, and with any luck, a good
3300    compiler will spot the tail recursion and compile it efficiently.
3301    
3302    Arguments:    The block containing the match data
3303    Returns:      The return from setjump()
3304    */
3305    
3306    static int
3307    my_setjmp(match_data *match_block)
3308    {
3309    return setjmp(match_block->fail_env);
3310    }
3311    
3312    
3313    
3314    /*************************************************
3315  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
3316  *************************************************/  *************************************************/
3317    
# Line 3307  int resetcount; Line 3342  int resetcount;
3342  int ocount = offsetcount;  int ocount = offsetcount;
3343  int first_char = -1;  int first_char = -1;
3344  match_data match_block;  match_data match_block;
3345  uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3346  uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject;
3347  uschar *end_subject;  const uschar *end_subject;
3348  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3349  real_pcre_extra *extra = (real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3350  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3351  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3352    
# Line 3321  if (re == NULL || subject == NULL || Line 3356  if (re == NULL || subject == NULL ||
3356     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3357  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3358    
3359  match_block.start_subject = (uschar *)subject;  match_block.start_subject = (const uschar *)subject;
3360  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3361  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
3362    
# Line 3349  if (re->top_backref > 0 && re->top_backr Line 3384  if (re->top_backref > 0 && re->top_backr
3384    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3385    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));
3386    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3387    #ifdef DEBUG    DPRINTF(("Got memory to hold back references\n"));
   printf("Got memory to hold back references\n");  
   #endif  
3388    }    }
3389  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
3390    
# Line 3404  if (!anchored) Line 3437  if (!anchored)
3437    
3438  do  do
3439    {    {
3440      int rc;
3441    register int *iptr = match_block.offset_vector;    register int *iptr = match_block.offset_vector;
3442    register int *iend = iptr + resetcount;    register int *iend = iptr + resetcount;
3443    
# Line 3445  do Line 3479  do
3479        }        }
3480      }      }
3481    
3482    #ifdef DEBUG  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
3483    printf(">>>> Match against: ");    printf(">>>> Match against: ");
3484    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, &match_block);
3485    printf("\n");    printf("\n");
3486    #endif  #endif
3487    
3488    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
3489    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
# Line 3459  do Line 3493  do
3493    if certain parts of the pattern were not used.    if certain parts of the pattern were not used.
3494    
3495    Before starting the match, we have to set up a longjmp() target to enable    Before starting the match, we have to set up a longjmp() target to enable
3496    the "cut" operation to fail a match completely without backtracking. */    the "cut" operation to fail a match completely without backtracking. This
3497      is done in a separate function to avoid compiler warnings. We need not do
3498    if (setjmp(match_block.fail_env) == 0 &&    it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3499        match(start_match, re->code, 2, &match_block))    enabled. */
     {  
     int rc;  
3500    
3501      if (ocount != offsetcount)    if (((re->options & PCRE_EXTRA) != 0 && my_setjmp(&match_block) != 0) ||
3502        {        !match(start_match, re->code, 2, &match_block))
3503        if (offsetcount >= 4)      continue;
         {  
         memcpy(offsets + 2, match_block.offset_vector + 2,  
           (offsetcount - 2) * sizeof(int));  
         #ifdef DEBUG  
         printf("Copied offsets; freeing temporary memory\n");  
         #endif  
         }  
       if (match_block.end_offset_top > offsetcount)  
         match_block.offset_overflow = TRUE;  
3504    
3505        #ifdef DEBUG    /* Copy the offset information from temporary store if necessary */
       printf("Freeing temporary memory\n");  
       #endif  
3506    
3507        (pcre_free)(match_block.offset_vector);    if (ocount != offsetcount)
3508        {
3509        if (offsetcount >= 4)
3510          {
3511          memcpy(offsets + 2, match_block.offset_vector + 2,
3512            (offsetcount - 2) * sizeof(int));
3513          DPRINTF(("Copied offsets; freeing temporary memory\n"));
3514        }        }
3515        if (match_block.end_offset_top > offsetcount)
3516          match_block.offset_overflow = TRUE;
3517    
3518      rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;      DPRINTF(("Freeing temporary memory\n"));
3519        (pcre_free)(match_block.offset_vector);
3520        }
3521    
3522      if (match_block.offset_end < 2) rc = 0; else    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       {  
       offsets[0] = start_match - match_block.start_subject;  
       offsets[1] = match_block.end_match_ptr - match_block.start_subject;  
       }  
3523    
3524      #ifdef DEBUG    if (match_block.offset_end < 2) rc = 0; else
3525      printf(">>>> returning %d\n", rc);      {
3526      #endif      offsets[0] = start_match - match_block.start_subject;
3527      return rc;      offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3528      }      }
3529    
3530      DPRINTF((">>>> returning %d\n", rc));
3531      return rc;
3532    }    }
3533  while (!anchored &&  while (!anchored &&
3534         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3535         start_match++ < end_subject);         start_match++ < end_subject);
3536    
3537  #ifdef DEBUG  DPRINTF((">>>> returning %d\n", match_block.errorcode));
 printf(">>>> returning %d\n", match_block.errorcode);  
 #endif  
3538    
3539  return match_block.errorcode;  return match_block.errorcode;
3540  }  }

Legend:
Removed from v.5  
changed lines
  Added in v.9

  ViewVC Help
Powered by ViewVC 1.1.5