/[pcre]/code/tags/pcre-1.04/pcre.c
ViewVC logotype

Diff of /code/tags/pcre-1.04/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by nigel, Sat Feb 24 21:38:01 2007 UTC revision 11 by nigel, Sat Feb 24 21:38:17 2007 UTC
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36    /* Use a macro for debugging printing, 'cause that eliminates the the use
37    of #ifdef inline, and there are *still* stupid compilers about that don't like
38    indented pre-processor statements. I suppose it's only been 10 years... */
39    
40    #ifdef DEBUG
41    #define DPRINTF(p) printf p
42    #else
43    #define DPRINTF(p) /*nothing*/
44    #endif
45    
46  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
47  the external pcre header. */  the external pcre header. */
# Line 48  static char rep_max[] = { 0, 0, 0, 0, 1, Line 57  static char rep_max[] = { 0, 0, 0, 0, 1,
57  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging */
58    
59  #ifdef DEBUG  #ifdef DEBUG
60  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
61      "End", "\\A", "\\B", "\\b", "\\D", "\\d",
62    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
63    "not",    "not",
64    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
# Line 81  static short int escapes[] = { Line 91  static short int escapes[] = {
91    
92  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
93    
94  static BOOL compile_regex(int, int *,uschar **,uschar **,char **);  static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **);
95    
96  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
97  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 98  typedef struct match_data { Line 108  typedef struct match_data {
108    BOOL   noteol;                /* NOTEOL flag */    BOOL   noteol;                /* NOTEOL flag */
109    BOOL   dotall;                /* Dot matches any char */    BOOL   dotall;                /* Dot matches any char */
110    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
111    uschar *start_subject;        /* Start of the subject string */    const uschar *start_subject;  /* Start of the subject string */
112    uschar *end_subject;          /* End of the subject string */    const uschar *end_subject;    /* End of the subject string */
113    jmp_buf fail_env;             /* Environment for longjump() break out */    jmp_buf fail_env;             /* Environment for longjump() break out */
114    uschar *end_match_ptr;        /* Subject position at end match */    const uschar *end_match_ptr;  /* Subject position at end match */
115    int     end_offset_top;       /* Highwater mark at end of match */    int     end_offset_top;       /* Highwater mark at end of match */
116  } match_data;  } match_data;
117    
# Line 126  void  (*pcre_free)(void *) = free; Line 136  void  (*pcre_free)(void *) = free;
136  *          Return version string                 *  *          Return version string                 *
137  *************************************************/  *************************************************/
138    
139  char *  const char *
140  pcre_version(void)  pcre_version(void)
141  {  {
142  return PCRE_VERSION;  return PCRE_VERSION;
# Line 156  Returns:        number of identifying ex Line 166  Returns:        number of identifying ex
166  int  int
167  pcre_info(const pcre *external_re, int *optptr, int *first_char)  pcre_info(const pcre *external_re, int *optptr, int *first_char)
168  {  {
169  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
170  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
171  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
172  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
# Line 186  Arguments: Line 196  Arguments:
196  Returns:     nothing  Returns:     nothing
197  */  */
198    
199  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
200    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
201  {  {
202  int c;  int c;
203  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 228  do { Line 239  do {
239      /* Test an embedded subpattern; if it could not be empty, break the      /* Test an embedded subpattern; if it could not be empty, break the
240      loop. Otherwise carry on in the branch. */      loop. Otherwise carry on in the branch. */
241    
242      if ((int)(*cc) >= OP_BRA)      if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)
243        {        {
244        if (!could_be_empty(cc)) break;        if (!could_be_empty(cc)) break;
245        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
# Line 272  do { Line 283  do {
283        case OP_MINSTAR:        case OP_MINSTAR:
284        case OP_QUERY:        case OP_QUERY:
285        case OP_MINQUERY:        case OP_MINQUERY:
286          case OP_NOTSTAR:
287          case OP_NOTMINSTAR:
288          case OP_NOTQUERY:
289          case OP_NOTMINQUERY:
290        case OP_TYPESTAR:        case OP_TYPESTAR:
291        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
292        case OP_TYPEQUERY:        case OP_TYPEQUERY:
# Line 292  do { Line 307  do {
307    
308        case OP_CLASS:        case OP_CLASS:
309        case OP_REF:        case OP_REF:
310        cc += (*cc == OP_REF)? 2 : 4 + 2 * cc[2] + cc[3];        cc += (*cc == OP_REF)? 2 : 33;
311    
312        switch (*cc)        switch (*cc)
313          {          {
# Line 356  Returns:     zero or positive => a data Line 371  Returns:     zero or positive => a data
371  */  */
372    
373  static int  static int
374  check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
375    BOOL isclass)    int options, BOOL isclass)
376  {  {
377  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
378  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
379  int i;  int i;
380    
# Line 378  else if ((i = escapes[c - '0']) != 0) c Line 393  else if ((i = escapes[c - '0']) != 0) c
393    
394  else  else
395    {    {
396    uschar *oldptr;    const uschar *oldptr;
397    switch (c)    switch (c)
398      {      {
399      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 498  Returns:    TRUE or FALSE Line 513  Returns:    TRUE or FALSE
513  */  */
514    
515  static BOOL  static BOOL
516  is_counted_repeat(uschar *p)  is_counted_repeat(const uschar *p)
517  {  {
518  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
519  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
# Line 533  Returns:     pointer to '}' on success; Line 548  Returns:     pointer to '}' on success;
548               current ptr on error, with errorptr set               current ptr on error, with errorptr set
549  */  */
550    
551  static uschar *  static const uschar *
552  read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
553  {  {
554  int min = 0;  int min = 0;
555  int max = -1;  int max = -1;
# Line 588  Returns:     TRUE on success Line 603  Returns:     TRUE on success
603  */  */
604    
605  static BOOL  static BOOL
606  compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_branch(int options, int *brackets, uschar **codeptr,
607    char **errorptr)    const uschar **ptrptr, const char **errorptr)
608  {  {
609  int repeat_type, op_type;  int repeat_type, op_type;
610  int repeat_min, repeat_max;  int repeat_min, repeat_max;
611  int bravalue, length;  int bravalue, length;
612  register int c;  register int c;
613  register uschar *code = *codeptr;  register uschar *code = *codeptr;
614  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
615    const uschar *oldptr;
616  uschar *previous = NULL;  uschar *previous = NULL;
 uschar *oldptr;  
617  uschar class[32];  uschar class[32];
618    
619  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
# Line 693  for (;; ptr++) Line 708  for (;; ptr++)
708        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
709        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
710        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
711        Inside a class (and only there) it is treated as backslash. Elsewhere        Inside a class (and only there) it is treated as backspace. Elsewhere
712        it marks a word boundary. Other escapes have preset maps ready to        it marks a word boundary. Other escapes have preset maps ready to
713        or into the one we are building. We assume they have more than one        or into the one we are building. We assume they have more than one
714        character in them, so set class_count bigger than one. */        character in them, so set class_count bigger than one. */
# Line 972  for (;; ptr++) Line 987  for (;; ptr++)
987            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
988            }            }
989    
990          /* Insert an UPTO if the max is greater than the min. */          /* If the maximum is unlimited, insert an OP_STAR. */
991    
992            if (repeat_max < 0)
993              {
994              *code++ = c;
995              *code++ = OP_STAR + repeat_type;
996              }
997    
998            /* Else insert an UPTO if the max is greater than the min. */
999    
1000          if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
1001            {            {
1002            *code++ = c;            *code++ = c;
1003            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 1018  for (;; ptr++) Line 1041  for (;; ptr++)
1041      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA)
1042        {        {
1043        int i;        int i;
1044        int length = code - previous;        int len = code - previous;
1045    
1046        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_max == -1 && could_be_empty(previous))
1047          {          {
# Line 1035  for (;; ptr++) Line 1058  for (;; ptr++)
1058          {          {
1059          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1060            {            {
1061            memcpy(code, previous, length);            memcpy(code, previous, len);
1062            code += length;            code += len;
1063            }            }
1064          }          }
1065    
# Line 1048  for (;; ptr++) Line 1071  for (;; ptr++)
1071          {          {
1072          if (repeat_min == 0)          if (repeat_min == 0)
1073            {            {
1074            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1075            code++;            code++;
1076            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1077            }            }
1078    
1079          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1080            {            {
1081            memcpy(code, previous, length);            memcpy(code, previous, len);
1082            code += length;            code += len;
1083            }            }
1084    
1085          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1086            {            {
1087            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1088            memcpy(code, previous, length);            memcpy(code, previous, len);
1089            code += length;            code += len;
1090            }            }
1091          }          }
1092    
# Line 1210  for (;; ptr++) Line 1233  for (;; ptr++)
1233        continue;        continue;
1234        }        }
1235    
1236      /* Reset and fall through */      /* Data character: reset and fall through */
1237    
1238      ptr = oldptr;      ptr = oldptr;
1239      c = '\\';      c = '\\';
# Line 1301  Returns:    TRUE on success Line 1324  Returns:    TRUE on success
1324  */  */
1325    
1326  static BOOL  static BOOL
1327  compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_regex(int options, int *brackets, uschar **codeptr,
1328    char **errorptr)    const uschar **ptrptr, const char **errorptr)
1329  {  {
1330  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1331  uschar *code = *codeptr;  uschar *code = *codeptr;
1332  uschar *start_bracket = code;  uschar *start_bracket = code;
1333    
# Line 1370  Returns:   TRUE or FALSE Line 1393  Returns:   TRUE or FALSE
1393  */  */
1394    
1395  static BOOL  static BOOL
1396  is_anchored(register uschar *code, BOOL multiline)  is_anchored(register const uschar *code, BOOL multiline)
1397  {  {
1398  do {  do {
1399     int op = (int)code[3];     int op = (int)code[3];
# Line 1399  Returns:   TRUE or FALSE Line 1422  Returns:   TRUE or FALSE
1422  */  */
1423    
1424  static BOOL  static BOOL
1425  is_startline(uschar *code)  is_startline(const uschar *code)
1426  {  {
1427  do {  do {
1428     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
# Line 1484  Returns:       pointer to compiled data Line 1507  Returns:       pointer to compiled data
1507  */  */
1508    
1509  pcre *  pcre *
1510  pcre_compile(const char *pattern, int options, char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1511    int *erroroffset)    int *erroroffset)
1512  {  {
1513  real_pcre *re;  real_pcre *re;
# Line 1494  int runlength; Line 1517  int runlength;
1517  int c, size;  int c, size;
1518  int bracount = 0;  int bracount = 0;
1519  int brastack[200];  int brastack[200];
 int brastackptr = 0;  
1520  int top_backref = 0;  int top_backref = 0;
1521  uschar *code, *ptr;  unsigned int brastackptr = 0;
1522    uschar *code;
1523    const uschar *ptr;
1524    
1525  #ifdef DEBUG  #ifdef DEBUG
1526  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1523  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1547  if ((options & ~PUBLIC_OPTIONS) != 0)
1547    return NULL;    return NULL;
1548    }    }
1549    
1550  #ifdef DEBUG  DPRINTF(("------------------------------------------------------------------\n"));
1551  printf("------------------------------------------------------------------\n");  DPRINTF(("%s\n", pattern));
 printf("%s\n", pattern);  
 #endif  
1552    
1553  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
1554  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1535  internal flag settings. Make an attempt Line 1557  internal flag settings. Make an attempt
1557  if an "extended" flag setting appears late in the pattern. We can't be so  if an "extended" flag setting appears late in the pattern. We can't be so
1558  clever for #-comments. */  clever for #-comments. */
1559    
1560  ptr = (uschar *)(pattern - 1);  ptr = (const uschar *)(pattern - 1);
1561  while ((c = *(++ptr)) != 0)  while ((c = *(++ptr)) != 0)
1562    {    {
1563    int min, max;    int min, max;
# Line 1562  while ((c = *(++ptr)) != 0) Line 1584  while ((c = *(++ptr)) != 0)
1584    
1585      case '\\':      case '\\':
1586        {        {
1587        uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
1588        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1589        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1590        if (c >= 0)        if (c >= 0)
# Line 1641  while ((c = *(++ptr)) != 0) Line 1663  while ((c = *(++ptr)) != 0)
1663        {        {
1664        if (*ptr == '\\')        if (*ptr == '\\')
1665          {          {
1666          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1667          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1668          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1669          }          }
1670        else class_charcount++;        else class_charcount++;
1671        ptr++;        ptr++;
# Line 1658  while ((c = *(++ptr)) != 0) Line 1680  while ((c = *(++ptr)) != 0)
1680    
1681        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
1682    
1683        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1684          {          {
1685          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1686          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
# Line 1766  while ((c = *(++ptr)) != 0) Line 1788  while ((c = *(++ptr)) != 0)
1788      continue;      continue;
1789    
1790      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
1791      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
1792        0 this is an unmatched bracket which will generate an error, but take care
1793        not to try to access brastack[-1]. */
1794    
1795      case ')':      case ')':
1796      length += 3;      length += 3;
1797        {        {
1798        int min = 1;        int minval = 1;
1799        int max = 1;        int maxval = 1;
1800        int duplength = length - brastack[--brastackptr];        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1801    
1802        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
1803        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
1804    
1805        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1806          {          {
1807          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1808          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1809          }          }
1810        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1811        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
1812        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
1813    
1814        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1815        there is a limited maximum we have to replicate up to max-1 times and        if there is a limited maximum we have to replicate up to maxval-1 times
1816        allow for a BRAZERO item before each optional copy, as we also have to        and allow for a BRAZERO item before each optional copy, as we also have
1817        do before the first copy if the minimum is zero. */        to do before the first copy if the minimum is zero. */
1818    
1819        if (min == 0) length++;        if (minval == 0) length++;
1820          else if (min > 1) length += (min - 1) * duplength;          else if (minval > 1) length += (minval - 1) * duplength;
1821        if (max > min) length += (max - min) * (duplength + 1);        if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1822        }        }
   
1823      continue;      continue;
1824    
1825      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1827  while ((c = *(++ptr)) != 0) Line 1850  while ((c = *(++ptr)) != 0)
1850    
1851        if (c == '\\')        if (c == '\\')
1852          {          {
1853          uschar *saveptr = ptr;          const uschar *saveptr = ptr;
1854          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1855          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1856          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
# Line 1857  if (length > 65539) Line 1880  if (length > 65539)
1880    }    }
1881    
1882  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
1883  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
1884    rather than just "code", because it has been reported that one broken compiler
1885    fails on "code" because it is also an independent variable. It should make no
1886    difference to the value of the offsetof(). */
1887    
1888  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
1889  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
1890    
1891  if (re == NULL)  if (re == NULL)
# Line 1868  if (re == NULL) Line 1894  if (re == NULL)
1894    return NULL;    return NULL;
1895    }    }
1896    
1897    /* Put in the magic number and the options. */
1898    
1899  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
1900  re->options = options;  re->options = options;
1901    
# Line 1875  re->options = options; Line 1903  re->options = options;
1903  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
1904  of the function here. */  of the function here. */
1905    
1906  ptr = (uschar *)pattern;  ptr = (const uschar *)pattern;
1907  code = re->code;  code = re->code;
1908  *code = OP_BRA;  *code = OP_BRA;
1909  bracount = 0;  bracount = 0;
# Line 1902  if (*errorptr != NULL) Line 1930  if (*errorptr != NULL)
1930    {    {
1931    (pcre_free)(re);    (pcre_free)(re);
1932    PCRE_ERROR_RETURN:    PCRE_ERROR_RETURN:
1933    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
1934    return NULL;    return NULL;
1935    }    }
1936    
# Line 1918  if ((options & PCRE_ANCHORED) == 0) Line 1946  if ((options & PCRE_ANCHORED) == 0)
1946      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
1947    else    else
1948      {      {
1949      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code);
1950      if (c >= 0)      if (ch >= 0)
1951        {        {
1952        re->first_char = c;        re->first_char = ch;
1953        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
1954        }        }
1955      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 2013  while (code < code_end) Line 2041  while (code < code_end)
2041      case OP_MINUPTO:      case OP_MINUPTO:
2042      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2043        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2044      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2045      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2046      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2047      code += 3;      code += 3;
# Line 2058  while (code < code_end) Line 2086  while (code < code_end)
2086    
2087      case OP_REF:      case OP_REF:
2088      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2089      break;      code ++;
2090        goto CLASS_REF_REPEAT;
2091    
2092      case OP_CLASS:      case OP_CLASS:
2093        {        {
# Line 2088  while (code < code_end) Line 2117  while (code < code_end)
2117        printf("]");        printf("]");
2118        code += 32;        code += 32;
2119    
2120          CLASS_REF_REPEAT:
2121    
2122        switch(*code)        switch(*code)
2123          {          {
2124          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2200  Returns:      TRUE if matched Line 2231  Returns:      TRUE if matched
2231  */  */
2232    
2233  static BOOL  static BOOL
2234  match_ref(int number, register uschar *eptr, int length, match_data *md)  match_ref(int number, register const uschar *eptr, int length, match_data *md)
2235  {  {
2236  uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[number];
2237    
2238  #ifdef DEBUG  #ifdef DEBUG
2239  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2249  Returns:       TRUE if matched Line 2280  Returns:       TRUE if matched
2280  */  */
2281    
2282  static BOOL  static BOOL
2283  match(register uschar *eptr, register uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2284    match_data *md)    match_data *md)
2285  {  {
2286  for (;;)  for (;;)
# Line 2257  for (;;) Line 2288  for (;;)
2288    int min, max, ctype;    int min, max, ctype;
2289    register int i;    register int i;
2290    register int c;    register int c;
2291    BOOL minimize;    BOOL minimize = FALSE;
2292    
2293    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening bracket. Check the alternative branches in turn, failing if none
2294    match. We have to set the start offset if required and there is space    match. We have to set the start offset if required and there is space
# Line 2270  for (;;) Line 2301  for (;;)
2301    if ((int)*ecode >= OP_BRA)    if ((int)*ecode >= OP_BRA)
2302      {      {
2303      int number = (*ecode - OP_BRA) << 1;      int number = (*ecode - OP_BRA) << 1;
2304      int save_offset1, save_offset2;      int save_offset1 = 0, save_offset2 = 0;
2305    
2306      #ifdef DEBUG      DPRINTF(("start bracket %d\n", number/2));
     printf("start bracket %d\n", number/2);  
     #endif  
2307    
2308      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2309        {        {
# Line 2282  for (;;) Line 2311  for (;;)
2311        save_offset2 = md->offset_vector[number+1];        save_offset2 = md->offset_vector[number+1];
2312        md->offset_vector[number] = eptr - md->start_subject;        md->offset_vector[number] = eptr - md->start_subject;
2313    
2314        #ifdef DEBUG        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
       printf("saving %d %d\n", save_offset1, save_offset2);  
       #endif  
2315        }        }
2316    
2317      /* Recurse for all the alternatives. */      /* Recurse for all the alternatives. */
# Line 2296  for (;;) Line 2323  for (;;)
2323        }        }
2324      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2325    
2326      #ifdef DEBUG      DPRINTF(("bracket %d failed\n", number/2));
     printf("bracket %d failed\n", number/2);  
     #endif  
2327    
2328      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2329        {        {
# Line 2360  for (;;) Line 2385  for (;;)
2385    
2386      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
2387      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
2388      a back into the brackets. Check the alternative branches in turn - the      a move back into the brackets. Check the alternative branches in turn - the
2389      matching won't pass the KET for this kind of subpattern. If any one branch      matching won't pass the KET for this kind of subpattern. If any one branch
2390      matches, we carry on, leaving the subject pointer. */      matches, we carry on, leaving the subject pointer. */
2391    
# Line 2397  for (;;) Line 2422  for (;;)
2422    
2423      case OP_BRAZERO:      case OP_BRAZERO:
2424        {        {
2425        uschar *next = ecode+1;        const uschar *next = ecode+1;
2426        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md)) return TRUE;
2427        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2428        ecode = next + 3;        ecode = next + 3;
# Line 2406  for (;;) Line 2431  for (;;)
2431    
2432      case OP_BRAMINZERO:      case OP_BRAMINZERO:
2433        {        {
2434        uschar *next = ecode+1;        const uschar *next = ecode+1;
2435        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2436        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md)) return TRUE;
2437        ecode++;        ecode++;
# Line 2422  for (;;) Line 2447  for (;;)
2447      case OP_KETRMAX:      case OP_KETRMAX:
2448        {        {
2449        int number;        int number;
2450        uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2451    
2452        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2453          {          {
# Line 2437  for (;;) Line 2462  for (;;)
2462    
2463        number = (*prev - OP_BRA) << 1;        number = (*prev - OP_BRA) << 1;
2464    
2465        #ifdef DEBUG        DPRINTF(("end bracket %d\n", number/2));
       printf("end bracket %d\n", number/2);  
       #endif  
2466    
2467        if (number > 0)        if (number > 0)
2468          {          {
# Line 2671  for (;;) Line 2694  for (;;)
2694    
2695        else        else
2696          {          {
2697          uschar *pp = eptr;          const uschar *pp = eptr;
2698          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2699            {            {
2700            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(number, eptr, length, md)) break;
# Line 2695  for (;;) Line 2718  for (;;)
2718    
2719      case OP_CLASS:      case OP_CLASS:
2720        {        {
2721        uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
2722        ecode += 33;               /* Advance past the item */        ecode += 33;                     /* Advance past the item */
2723    
2724        switch (*ecode)        switch (*ecode)
2725          {          {
# Line 2779  for (;;) Line 2802  for (;;)
2802    
2803        else        else
2804          {          {
2805          uschar *pp = eptr;          const uschar *pp = eptr;
2806          for (i = min; i < max; eptr++, i++)          for (i = min; i < max; eptr++, i++)
2807            {            {
2808            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
# Line 2807  for (;;) Line 2830  for (;;)
2830        register int length = ecode[1];        register int length = ecode[1];
2831        ecode += 2;        ecode += 2;
2832    
2833        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
2834        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
2835          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
2836        else        else
# Line 2818  for (;;) Line 2841  for (;;)
2841          }          }
2842        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
2843        printf("\n");        printf("\n");
2844        #endif  #endif
2845    
2846        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
2847        if (md->caseless)        if (md->caseless)
# Line 2875  for (;;) Line 2898  for (;;)
2898      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
2899      characters and work backwards. */      characters and work backwards. */
2900    
2901      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2902      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
2903    
2904      if (md->caseless)      if (md->caseless)
2905        {        {
# Line 2897  for (;;) Line 2918  for (;;)
2918          }          }
2919        else        else
2920          {          {
2921          uschar *pp = eptr;          const uschar *pp = eptr;
2922          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2923            {            {
2924            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
# Line 2927  for (;;) Line 2948  for (;;)
2948          }          }
2949        else        else
2950          {          {
2951          uschar *pp = eptr;          const uschar *pp = eptr;
2952          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2953            {            {
2954            if (eptr >= md->end_subject || c != *eptr) break;            if (eptr >= md->end_subject || c != *eptr) break;
# Line 2943  for (;;) Line 2964  for (;;)
2964      /* Match a negated single character */      /* Match a negated single character */
2965    
2966      case OP_NOT:      case OP_NOT:
2967      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
2968      ecode++;      ecode++;
2969      if (md->caseless)      if (md->caseless)
2970        {        {
# Line 3002  for (;;) Line 3023  for (;;)
3023      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3024      characters and work backwards. */      characters and work backwards. */
3025    
3026      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3027      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3028    
3029      if (md->caseless)      if (md->caseless)
3030        {        {
# Line 3024  for (;;) Line 3043  for (;;)
3043          }          }
3044        else        else
3045          {          {
3046          uschar *pp = eptr;          const uschar *pp = eptr;
3047          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3048            {            {
3049            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
# Line 3054  for (;;) Line 3073  for (;;)
3073          }          }
3074        else        else
3075          {          {
3076          uschar *pp = eptr;          const uschar *pp = eptr;
3077          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3078            {            {
3079            if (eptr >= md->end_subject || c == *eptr) break;            if (eptr >= md->end_subject || c == *eptr) break;
# Line 3171  for (;;) Line 3190  for (;;)
3190    
3191      else      else
3192        {        {
3193        uschar *pp = eptr;        const uschar *pp = eptr;
3194        switch(ctype)        switch(ctype)
3195          {          {
3196          case OP_ANY:          case OP_ANY:
# Line 3255  for (;;) Line 3274  for (;;)
3274      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
3275    
3276      default:      default:
3277      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
3278      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3279      return FALSE;      return FALSE;
3280      }      }
# Line 3273  for (;;) Line 3290  for (;;)
3290    
3291    
3292  /*************************************************  /*************************************************
3293    *         Segregate setjmp()                     *
3294    *************************************************/
3295    
3296    /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3297    is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3298    hide it in a separate function. This is called only when PCRE_EXTRA is set,
3299    since it's needed only for the extension \X option, and with any luck, a good
3300    compiler will spot the tail recursion and compile it efficiently.
3301    
3302    Arguments:
3303       eptr        pointer in subject
3304       ecode       position in code
3305       offset_top  current top pointer
3306       md          pointer to "static" info for the match
3307    
3308    Returns:       TRUE if matched
3309    */
3310    
3311    static BOOL
3312    match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3313      match_data *match_block)
3314    {
3315    return setjmp(match_block->fail_env) == 0 &&
3316          match(eptr, ecode, offset_top, match_block);
3317    }
3318    
3319    
3320    
3321    /*************************************************
3322  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
3323  *************************************************/  *************************************************/
3324    
# Line 3299  int Line 3345  int
3345  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3346    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int options, int *offsets, int offsetcount)
3347  {  {
3348  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
3349  int first_char = -1;  int first_char = -1;
3350  match_data match_block;  match_data match_block;
3351  uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3352  uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject;
3353  uschar *end_subject;  const uschar *end_subject;
3354  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3355  real_pcre_extra *extra = (real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3356    BOOL using_temporary_offsets = FALSE;
3357  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3358  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3359    
# Line 3317  if (re == NULL || subject == NULL || Line 3363  if (re == NULL || subject == NULL ||
3363     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3364  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3365    
3366  match_block.start_subject = (uschar *)subject;  match_block.start_subject = (const uschar *)subject;
3367  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3368  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
3369    
# Line 3336  match_block.errorcode = PCRE_ERROR_NOMAT Line 3382  match_block.errorcode = PCRE_ERROR_NOMAT
3382    
3383  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
3384  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
3385  Otherwise, we can use the vector supplied, rounding down the size of it to a  Otherwise, we can use the vector supplied, rounding down its size to a multiple
3386  multiple of 2. */  of 2. */
3387    
3388  ocount &= (-2);  ocount = offsetcount & (-2);
3389  if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3390    {    {
3391    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3392    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));
3393    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3394    #ifdef DEBUG    using_temporary_offsets = TRUE;
3395    printf("Got memory to hold back references\n");    DPRINTF(("Got memory to hold back references\n"));
   #endif  
3396    }    }
3397  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
3398    
# Line 3400  if (!anchored) Line 3445  if (!anchored)
3445    
3446  do  do
3447    {    {
3448      int rc;
3449    register int *iptr = match_block.offset_vector;    register int *iptr = match_block.offset_vector;
3450    register int *iend = iptr + resetcount;    register int *iend = iptr + resetcount;
3451    
# Line 3441  do Line 3487  do
3487        }        }
3488      }      }
3489    
3490    #ifdef DEBUG  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
3491    printf(">>>> Match against: ");    printf(">>>> Match against: ");
3492    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, &match_block);
3493    printf("\n");    printf("\n");
3494    #endif  #endif
3495    
3496    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
3497    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
# Line 3455  do Line 3501  do
3501    if certain parts of the pattern were not used.    if certain parts of the pattern were not used.
3502    
3503    Before starting the match, we have to set up a longjmp() target to enable    Before starting the match, we have to set up a longjmp() target to enable
3504    the "cut" operation to fail a match completely without backtracking. */    the "cut" operation to fail a match completely without backtracking. This
3505      is done in a separate function to avoid compiler warnings. We need not do
3506      it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3507      enabled. */
3508    
3509    if (setjmp(match_block.fail_env) == 0 &&    if ((re->options & PCRE_EXTRA) != 0)
       match(start_match, re->code, 2, &match_block))  
3510      {      {
3511      int rc;      if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3512          continue;
3513      if (ocount != offsetcount)      }
3514        {    else if (!match(start_match, re->code, 2, &match_block)) continue;
       if (offsetcount >= 4)  
         {  
         memcpy(offsets + 2, match_block.offset_vector + 2,  
           (offsetcount - 2) * sizeof(int));  
         #ifdef DEBUG  
         printf("Copied offsets; freeing temporary memory\n");  
         #endif  
         }  
       if (match_block.end_offset_top > offsetcount)  
         match_block.offset_overflow = TRUE;  
3515    
3516        #ifdef DEBUG    /* Copy the offset information from temporary store if necessary */
       printf("Freeing temporary memory\n");  
       #endif  
3517    
3518        (pcre_free)(match_block.offset_vector);    if (using_temporary_offsets)
3519        {
3520        if (offsetcount >= 4)
3521          {
3522          memcpy(offsets + 2, match_block.offset_vector + 2,
3523            (offsetcount - 2) * sizeof(int));
3524          DPRINTF(("Copied offsets from temporary memory\n"));
3525        }        }
3526        if (match_block.end_offset_top > offsetcount)
3527          match_block.offset_overflow = TRUE;
3528    
3529      rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;      DPRINTF(("Freeing temporary memory\n"));
3530        (pcre_free)(match_block.offset_vector);
3531        }
3532    
3533      if (match_block.offset_end < 2) rc = 0; else    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       {  
       offsets[0] = start_match - match_block.start_subject;  
       offsets[1] = match_block.end_match_ptr - match_block.start_subject;  
       }  
3534    
3535      #ifdef DEBUG    if (match_block.offset_end < 2) rc = 0; else
3536      printf(">>>> returning %d\n", rc);      {
3537      #endif      offsets[0] = start_match - match_block.start_subject;
3538      return rc;      offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3539      }      }
3540    
3541      DPRINTF((">>>> returning %d\n", rc));
3542      return rc;
3543    }    }
3544  while (!anchored &&  while (!anchored &&
3545         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3546         start_match++ < end_subject);         start_match++ < end_subject);
3547    
3548  #ifdef DEBUG  if (using_temporary_offsets)
3549  printf(">>>> returning %d\n", match_block.errorcode);    {
3550  #endif    DPRINTF(("Freeing temporary memory\n"));
3551      (pcre_free)(match_block.offset_vector);
3552      }
3553    
3554    DPRINTF((">>>> returning %d\n", match_block.errorcode));
3555    
3556  return match_block.errorcode;  return match_block.errorcode;
3557  }  }

Legend:
Removed from v.3  
changed lines
  Added in v.11

  ViewVC Help
Powered by ViewVC 1.1.5