/[pcre]/code/tags/pcre-2.00/pcre.c
ViewVC logotype

Diff of /code/tags/pcre-2.00/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by nigel, Sat Feb 24 21:38:01 2007 UTC revision 13 by nigel, Sat Feb 24 21:38:21 2007 UTC
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36    /* Use a macro for debugging printing, 'cause that eliminates the the use
37    of #ifdef inline, and there are *still* stupid compilers about that don't like
38    indented pre-processor statements. I suppose it's only been 10 years... */
39    
40    #ifdef DEBUG
41    #define DPRINTF(p) printf p
42    #else
43    #define DPRINTF(p) /*nothing*/
44    #endif
45    
46  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
47  the external pcre header. */  the external pcre header. */
# Line 45  the external pcre header. */ Line 54  the external pcre header. */
54  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };
55  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };
56    
57  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
58    
59  #ifdef DEBUG  #ifdef DEBUG
60  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
61      "End", "\\A", "\\B", "\\b", "\\D", "\\d",
62    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
63    "not",    "not",
64    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
65    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
66    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
67    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
68    "class", "Ref",    "class", "negclass", "Ref",
69    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
70    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
71  };  };
# Line 81  static short int escapes[] = { Line 91  static short int escapes[] = {
91    
92  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
93    
94  static BOOL compile_regex(int, int *,uschar **,uschar **,char **);  static BOOL
95      compile_regex(int, int *, uschar **, const uschar **, const char **);
96    
97  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
98  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 98  typedef struct match_data { Line 109  typedef struct match_data {
109    BOOL   noteol;                /* NOTEOL flag */    BOOL   noteol;                /* NOTEOL flag */
110    BOOL   dotall;                /* Dot matches any char */    BOOL   dotall;                /* Dot matches any char */
111    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
112    uschar *start_subject;        /* Start of the subject string */    const uschar *start_subject;  /* Start of the subject string */
113    uschar *end_subject;          /* End of the subject string */    const uschar *end_subject;    /* End of the subject string */
114    jmp_buf fail_env;             /* Environment for longjump() break out */    jmp_buf fail_env;             /* Environment for longjump() break out */
115    uschar *end_match_ptr;        /* Subject position at end match */    const uschar *end_match_ptr;  /* Subject position at end match */
116    int     end_offset_top;       /* Highwater mark at end of match */    int     end_offset_top;       /* Highwater mark at end of match */
117  } match_data;  } match_data;
118    
# Line 126  void  (*pcre_free)(void *) = free; Line 137  void  (*pcre_free)(void *) = free;
137  *          Return version string                 *  *          Return version string                 *
138  *************************************************/  *************************************************/
139    
140  char *  const char *
141  pcre_version(void)  pcre_version(void)
142  {  {
143  return PCRE_VERSION;  return PCRE_VERSION;
# Line 156  Returns:        number of identifying ex Line 167  Returns:        number of identifying ex
167  int  int
168  pcre_info(const pcre *external_re, int *optptr, int *first_char)  pcre_info(const pcre *external_re, int *optptr, int *first_char)
169  {  {
170  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
171  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
172  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
173  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
# Line 186  Arguments: Line 197  Arguments:
197  Returns:     nothing  Returns:     nothing
198  */  */
199    
200  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
201    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
202  {  {
203  int c;  int c;
204  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 228  do { Line 240  do {
240      /* Test an embedded subpattern; if it could not be empty, break the      /* Test an embedded subpattern; if it could not be empty, break the
241      loop. Otherwise carry on in the branch. */      loop. Otherwise carry on in the branch. */
242    
243      if ((int)(*cc) >= OP_BRA)      if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)
244        {        {
245        if (!could_be_empty(cc)) break;        if (!could_be_empty(cc)) break;
246        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
# Line 272  do { Line 284  do {
284        case OP_MINSTAR:        case OP_MINSTAR:
285        case OP_QUERY:        case OP_QUERY:
286        case OP_MINQUERY:        case OP_MINQUERY:
287          case OP_NOTSTAR:
288          case OP_NOTMINSTAR:
289          case OP_NOTQUERY:
290          case OP_NOTMINQUERY:
291        case OP_TYPESTAR:        case OP_TYPESTAR:
292        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
293        case OP_TYPEQUERY:        case OP_TYPEQUERY:
# Line 291  do { Line 307  do {
307        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
308    
309        case OP_CLASS:        case OP_CLASS:
310          case OP_NEGCLASS:
311        case OP_REF:        case OP_REF:
312        cc += (*cc == OP_REF)? 2 : 4 + 2 * cc[2] + cc[3];        cc += (*cc == OP_REF)? 2 : 33;
313    
314        switch (*cc)        switch (*cc)
315          {          {
# Line 356  Returns:     zero or positive => a data Line 373  Returns:     zero or positive => a data
373  */  */
374    
375  static int  static int
376  check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
377    BOOL isclass)    int options, BOOL isclass)
378  {  {
379  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
380  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
381  int i;  int i;
382    
# Line 378  else if ((i = escapes[c - '0']) != 0) c Line 395  else if ((i = escapes[c - '0']) != 0) c
395    
396  else  else
397    {    {
398    uschar *oldptr;    const uschar *oldptr;
399    switch (c)    switch (c)
400      {      {
401      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 498  Returns:    TRUE or FALSE Line 515  Returns:    TRUE or FALSE
515  */  */
516    
517  static BOOL  static BOOL
518  is_counted_repeat(uschar *p)  is_counted_repeat(const uschar *p)
519  {  {
520  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
521  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
# Line 533  Returns:     pointer to '}' on success; Line 550  Returns:     pointer to '}' on success;
550               current ptr on error, with errorptr set               current ptr on error, with errorptr set
551  */  */
552    
553  static uschar *  static const uschar *
554  read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
555  {  {
556  int min = 0;  int min = 0;
557  int max = -1;  int max = -1;
# Line 588  Returns:     TRUE on success Line 605  Returns:     TRUE on success
605  */  */
606    
607  static BOOL  static BOOL
608  compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_branch(int options, int *brackets, uschar **codeptr,
609    char **errorptr)    const uschar **ptrptr, const char **errorptr)
610  {  {
611  int repeat_type, op_type;  int repeat_type, op_type;
612  int repeat_min, repeat_max;  int repeat_min, repeat_max;
613  int bravalue, length;  int bravalue, length;
614  register int c;  register int c;
615  register uschar *code = *codeptr;  register uschar *code = *codeptr;
616  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
617    const uschar *oldptr;
618  uschar *previous = NULL;  uschar *previous = NULL;
 uschar *oldptr;  
619  uschar class[32];  uschar class[32];
620    
621  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
# Line 655  for (;; ptr++) Line 672  for (;; ptr++)
672    
673      case '[':      case '[':
674      previous = code;      previous = code;
     *code++ = OP_CLASS;  
675    
676      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
677        different opcode. This only matters if caseless matching is specified at
678        runtime. */
679    
680      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
681        {        {
682        negate_class = TRUE;        negate_class = TRUE;
683          *code++ = OP_NEGCLASS;
684        c = *(++ptr);        c = *(++ptr);
685        }        }
686      else negate_class = FALSE;      else
687          {
688          negate_class = FALSE;
689          *code++ = OP_CLASS;
690          }
691    
692      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
693      character. */      character. */
# Line 693  for (;; ptr++) Line 716  for (;; ptr++)
716        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
717        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
718        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
719        Inside a class (and only there) it is treated as backslash. Elsewhere        Inside a class (and only there) it is treated as backspace. Elsewhere
720        it marks a word boundary. Other escapes have preset maps ready to        it marks a word boundary. Other escapes have preset maps ready to
721        or into the one we are building. We assume they have more than one        or into the one we are building. We assume they have more than one
722        character in them, so set class_count bigger than one. */        character in them, so set class_count bigger than one. */
# Line 972  for (;; ptr++) Line 995  for (;; ptr++)
995            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
996            }            }
997    
998          /* Insert an UPTO if the max is greater than the min. */          /* If the maximum is unlimited, insert an OP_STAR. */
999    
1000          if (repeat_max != repeat_min)          if (repeat_max < 0)
1001              {
1002              *code++ = c;
1003              *code++ = OP_STAR + repeat_type;
1004              }
1005    
1006            /* Else insert an UPTO if the max is greater than the min. */
1007    
1008            else if (repeat_max != repeat_min)
1009            {            {
1010            *code++ = c;            *code++ = c;
1011            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 992  for (;; ptr++) Line 1023  for (;; ptr++)
1023      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1024      stuff after it. */      stuff after it. */
1025    
1026      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1027                 *previous == OP_REF)
1028        {        {
1029        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1030          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1018  for (;; ptr++) Line 1050  for (;; ptr++)
1050      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA)
1051        {        {
1052        int i;        int i;
1053        int length = code - previous;        int len = code - previous;
1054    
1055        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_max == -1 && could_be_empty(previous))
1056          {          {
# Line 1035  for (;; ptr++) Line 1067  for (;; ptr++)
1067          {          {
1068          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1069            {            {
1070            memcpy(code, previous, length);            memcpy(code, previous, len);
1071            code += length;            code += len;
1072            }            }
1073          }          }
1074    
# Line 1048  for (;; ptr++) Line 1080  for (;; ptr++)
1080          {          {
1081          if (repeat_min == 0)          if (repeat_min == 0)
1082            {            {
1083            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1084            code++;            code++;
1085            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1086            }            }
1087    
1088          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1089            {            {
1090            memcpy(code, previous, length);            memcpy(code, previous, len);
1091            code += length;            code += len;
1092            }            }
1093    
1094          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1095            {            {
1096            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1097            memcpy(code, previous, length);            memcpy(code, previous, len);
1098            code += length;            code += len;
1099            }            }
1100          }          }
1101    
# Line 1210  for (;; ptr++) Line 1242  for (;; ptr++)
1242        continue;        continue;
1243        }        }
1244    
1245      /* Reset and fall through */      /* Data character: reset and fall through */
1246    
1247      ptr = oldptr;      ptr = oldptr;
1248      c = '\\';      c = '\\';
# Line 1301  Returns:    TRUE on success Line 1333  Returns:    TRUE on success
1333  */  */
1334    
1335  static BOOL  static BOOL
1336  compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_regex(int options, int *brackets, uschar **codeptr,
1337    char **errorptr)    const uschar **ptrptr, const char **errorptr)
1338  {  {
1339  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1340  uschar *code = *codeptr;  uschar *code = *codeptr;
1341  uschar *start_bracket = code;  uschar *start_bracket = code;
1342    
# Line 1370  Returns:   TRUE or FALSE Line 1402  Returns:   TRUE or FALSE
1402  */  */
1403    
1404  static BOOL  static BOOL
1405  is_anchored(register uschar *code, BOOL multiline)  is_anchored(register const uschar *code, BOOL multiline)
1406  {  {
1407  do {  do {
1408     int op = (int)code[3];     int op = (int)code[3];
# Line 1399  Returns:   TRUE or FALSE Line 1431  Returns:   TRUE or FALSE
1431  */  */
1432    
1433  static BOOL  static BOOL
1434  is_startline(uschar *code)  is_startline(const uschar *code)
1435  {  {
1436  do {  do {
1437     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
# Line 1484  Returns:       pointer to compiled data Line 1516  Returns:       pointer to compiled data
1516  */  */
1517    
1518  pcre *  pcre *
1519  pcre_compile(const char *pattern, int options, char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1520    int *erroroffset)    int *erroroffset)
1521  {  {
1522  real_pcre *re;  real_pcre *re;
# Line 1494  int runlength; Line 1526  int runlength;
1526  int c, size;  int c, size;
1527  int bracount = 0;  int bracount = 0;
1528  int brastack[200];  int brastack[200];
 int brastackptr = 0;  
1529  int top_backref = 0;  int top_backref = 0;
1530  uschar *code, *ptr;  unsigned int brastackptr = 0;
1531    uschar *code;
1532    const uschar *ptr;
1533    
1534  #ifdef DEBUG  #ifdef DEBUG
1535  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1523  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1556  if ((options & ~PUBLIC_OPTIONS) != 0)
1556    return NULL;    return NULL;
1557    }    }
1558    
1559  #ifdef DEBUG  DPRINTF(("------------------------------------------------------------------\n"));
1560  printf("------------------------------------------------------------------\n");  DPRINTF(("%s\n", pattern));
 printf("%s\n", pattern);  
 #endif  
1561    
1562  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
1563  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1535  internal flag settings. Make an attempt Line 1566  internal flag settings. Make an attempt
1566  if an "extended" flag setting appears late in the pattern. We can't be so  if an "extended" flag setting appears late in the pattern. We can't be so
1567  clever for #-comments. */  clever for #-comments. */
1568    
1569  ptr = (uschar *)(pattern - 1);  ptr = (const uschar *)(pattern - 1);
1570  while ((c = *(++ptr)) != 0)  while ((c = *(++ptr)) != 0)
1571    {    {
1572    int min, max;    int min, max;
# Line 1562  while ((c = *(++ptr)) != 0) Line 1593  while ((c = *(++ptr)) != 0)
1593    
1594      case '\\':      case '\\':
1595        {        {
1596        uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
1597        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1598        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1599        if (c >= 0)        if (c >= 0)
# Line 1641  while ((c = *(++ptr)) != 0) Line 1672  while ((c = *(++ptr)) != 0)
1672        {        {
1673        if (*ptr == '\\')        if (*ptr == '\\')
1674          {          {
1675          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1676          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1677          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1678          }          }
1679        else class_charcount++;        else class_charcount++;
1680        ptr++;        ptr++;
# Line 1658  while ((c = *(++ptr)) != 0) Line 1689  while ((c = *(++ptr)) != 0)
1689    
1690        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
1691    
1692        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1693          {          {
1694          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1695          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
# Line 1766  while ((c = *(++ptr)) != 0) Line 1797  while ((c = *(++ptr)) != 0)
1797      continue;      continue;
1798    
1799      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
1800      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
1801        0 this is an unmatched bracket which will generate an error, but take care
1802        not to try to access brastack[-1]. */
1803    
1804      case ')':      case ')':
1805      length += 3;      length += 3;
1806        {        {
1807        int min = 1;        int minval = 1;
1808        int max = 1;        int maxval = 1;
1809        int duplength = length - brastack[--brastackptr];        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1810    
1811        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
1812        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
1813    
1814        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1815          {          {
1816          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1817          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1818          }          }
1819        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1820        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
1821        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
1822    
1823        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1824        there is a limited maximum we have to replicate up to max-1 times and        if there is a limited maximum we have to replicate up to maxval-1 times
1825        allow for a BRAZERO item before each optional copy, as we also have to        and allow for a BRAZERO item before each optional copy, as we also have
1826        do before the first copy if the minimum is zero. */        to do before the first copy if the minimum is zero. */
1827    
1828        if (min == 0) length++;        if (minval == 0) length++;
1829          else if (min > 1) length += (min - 1) * duplength;          else if (minval > 1) length += (minval - 1) * duplength;
1830        if (max > min) length += (max - min) * (duplength + 1);        if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1831        }        }
   
1832      continue;      continue;
1833    
1834      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1827  while ((c = *(++ptr)) != 0) Line 1859  while ((c = *(++ptr)) != 0)
1859    
1860        if (c == '\\')        if (c == '\\')
1861          {          {
1862          uschar *saveptr = ptr;          const uschar *saveptr = ptr;
1863          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1864          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1865          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
# Line 1857  if (length > 65539) Line 1889  if (length > 65539)
1889    }    }
1890    
1891  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
1892  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
1893    rather than just "code", because it has been reported that one broken compiler
1894    fails on "code" because it is also an independent variable. It should make no
1895    difference to the value of the offsetof(). */
1896    
1897  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
1898  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
1899    
1900  if (re == NULL)  if (re == NULL)
# Line 1868  if (re == NULL) Line 1903  if (re == NULL)
1903    return NULL;    return NULL;
1904    }    }
1905    
1906    /* Put in the magic number and the options. */
1907    
1908  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
1909  re->options = options;  re->options = options;
1910    
# Line 1875  re->options = options; Line 1912  re->options = options;
1912  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
1913  of the function here. */  of the function here. */
1914    
1915  ptr = (uschar *)pattern;  ptr = (const uschar *)pattern;
1916  code = re->code;  code = re->code;
1917  *code = OP_BRA;  *code = OP_BRA;
1918  bracount = 0;  bracount = 0;
# Line 1902  if (*errorptr != NULL) Line 1939  if (*errorptr != NULL)
1939    {    {
1940    (pcre_free)(re);    (pcre_free)(re);
1941    PCRE_ERROR_RETURN:    PCRE_ERROR_RETURN:
1942    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
1943    return NULL;    return NULL;
1944    }    }
1945    
# Line 1918  if ((options & PCRE_ANCHORED) == 0) Line 1955  if ((options & PCRE_ANCHORED) == 0)
1955      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
1956    else    else
1957      {      {
1958      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code);
1959      if (c >= 0)      if (ch >= 0)
1960        {        {
1961        re->first_char = c;        re->first_char = ch;
1962        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
1963        }        }
1964      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 2013  while (code < code_end) Line 2050  while (code < code_end)
2050      case OP_MINUPTO:      case OP_MINUPTO:
2051      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2052        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2053      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2054      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2055      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2056      code += 3;      code += 3;
# Line 2058  while (code < code_end) Line 2095  while (code < code_end)
2095    
2096      case OP_REF:      case OP_REF:
2097      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2098      break;      code ++;
2099        goto CLASS_REF_REPEAT;
2100    
2101      case OP_CLASS:      case OP_CLASS:
2102        case OP_NEGCLASS:
2103        {        {
2104        int i, min, max;        int i, min, max;
2105    
2106        code++;        if (*code++ == OP_CLASS) printf("    [");
2107        printf("    [");          else printf("   ^[");
2108    
2109        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2110          {          {
# Line 2088  while (code < code_end) Line 2127  while (code < code_end)
2127        printf("]");        printf("]");
2128        code += 32;        code += 32;
2129    
2130          CLASS_REF_REPEAT:
2131    
2132        switch(*code)        switch(*code)
2133          {          {
2134          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2200  Returns:      TRUE if matched Line 2241  Returns:      TRUE if matched
2241  */  */
2242    
2243  static BOOL  static BOOL
2244  match_ref(int number, register uschar *eptr, int length, match_data *md)  match_ref(int number, register const uschar *eptr, int length, match_data *md)
2245  {  {
2246  uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[number];
2247    
2248  #ifdef DEBUG  #ifdef DEBUG
2249  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2249  Returns:       TRUE if matched Line 2290  Returns:       TRUE if matched
2290  */  */
2291    
2292  static BOOL  static BOOL
2293  match(register uschar *eptr, register uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2294    match_data *md)    match_data *md)
2295  {  {
2296  for (;;)  for (;;)
# Line 2257  for (;;) Line 2298  for (;;)
2298    int min, max, ctype;    int min, max, ctype;
2299    register int i;    register int i;
2300    register int c;    register int c;
2301    BOOL minimize;    BOOL minimize = FALSE;
2302    
2303    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening bracket. Check the alternative branches in turn, failing if none
2304    match. We have to set the start offset if required and there is space    match. We have to set the start offset if required and there is space
# Line 2270  for (;;) Line 2311  for (;;)
2311    if ((int)*ecode >= OP_BRA)    if ((int)*ecode >= OP_BRA)
2312      {      {
2313      int number = (*ecode - OP_BRA) << 1;      int number = (*ecode - OP_BRA) << 1;
2314      int save_offset1, save_offset2;      int save_offset1 = 0, save_offset2 = 0;
2315    
2316      #ifdef DEBUG      DPRINTF(("start bracket %d\n", number/2));
     printf("start bracket %d\n", number/2);  
     #endif  
2317    
2318      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2319        {        {
# Line 2282  for (;;) Line 2321  for (;;)
2321        save_offset2 = md->offset_vector[number+1];        save_offset2 = md->offset_vector[number+1];
2322        md->offset_vector[number] = eptr - md->start_subject;        md->offset_vector[number] = eptr - md->start_subject;
2323    
2324        #ifdef DEBUG        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
       printf("saving %d %d\n", save_offset1, save_offset2);  
       #endif  
2325        }        }
2326    
2327      /* Recurse for all the alternatives. */      /* Recurse for all the alternatives. */
# Line 2296  for (;;) Line 2333  for (;;)
2333        }        }
2334      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2335    
2336      #ifdef DEBUG      DPRINTF(("bracket %d failed\n", number/2));
     printf("bracket %d failed\n", number/2);  
     #endif  
2337    
2338      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2339        {        {
# Line 2360  for (;;) Line 2395  for (;;)
2395    
2396      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
2397      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
2398      a back into the brackets. Check the alternative branches in turn - the      a move back into the brackets. Check the alternative branches in turn - the
2399      matching won't pass the KET for this kind of subpattern. If any one branch      matching won't pass the KET for this kind of subpattern. If any one branch
2400      matches, we carry on, leaving the subject pointer. */      matches, we carry on, leaving the subject pointer. */
2401    
# Line 2397  for (;;) Line 2432  for (;;)
2432    
2433      case OP_BRAZERO:      case OP_BRAZERO:
2434        {        {
2435        uschar *next = ecode+1;        const uschar *next = ecode+1;
2436        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md)) return TRUE;
2437        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2438        ecode = next + 3;        ecode = next + 3;
# Line 2406  for (;;) Line 2441  for (;;)
2441    
2442      case OP_BRAMINZERO:      case OP_BRAMINZERO:
2443        {        {
2444        uschar *next = ecode+1;        const uschar *next = ecode+1;
2445        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2446        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md)) return TRUE;
2447        ecode++;        ecode++;
# Line 2422  for (;;) Line 2457  for (;;)
2457      case OP_KETRMAX:      case OP_KETRMAX:
2458        {        {
2459        int number;        int number;
2460        uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2461    
2462        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2463          {          {
# Line 2437  for (;;) Line 2472  for (;;)
2472    
2473        number = (*prev - OP_BRA) << 1;        number = (*prev - OP_BRA) << 1;
2474    
2475        #ifdef DEBUG        DPRINTF(("end bracket %d\n", number/2));
       printf("end bracket %d\n", number/2);  
       #endif  
2476    
2477        if (number > 0)        if (number > 0)
2478          {          {
# Line 2671  for (;;) Line 2704  for (;;)
2704    
2705        else        else
2706          {          {
2707          uschar *pp = eptr;          const uschar *pp = eptr;
2708          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2709            {            {
2710            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(number, eptr, length, md)) break;
# Line 2691  for (;;) Line 2724  for (;;)
2724      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2725      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2726      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2727      versions of a character. */      versions of a character, and we have to behave differently for positive and
2728        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2729        treated differently. */
2730    
2731      case OP_CLASS:      case OP_CLASS:
2732        case OP_NEGCLASS:
2733        {        {
2734        uschar *data = ecode + 1;  /* Save for matching */        BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2735        ecode += 33;               /* Advance past the item */        const uschar *data = ecode + 1;  /* Save for matching */
2736          ecode += 33;                     /* Advance past the item */
2737    
2738        switch (*ecode)        switch (*ecode)
2739          {          {
# Line 2723  for (;;) Line 2760  for (;;)
2760          break;          break;
2761    
2762          default:               /* No repeat follows */          default:               /* No repeat follows */
2763          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2764          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2765          }          }
2766    
2767        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2740  for (;;) Line 2770  for (;;)
2770          {          {
2771          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2772          c = *eptr++;          c = *eptr++;
2773          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2774          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2775            runtime caseless, continue if either case is in the map. */
2776    
2777            if (!nasty_case)
2778              {
2779              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2780              if (md->runtime_caseless)
2781                {
2782                c = pcre_fcc[c];
2783                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2784                }
2785              }
2786    
2787            /* Runtime caseless and it was a negative class. Continue only if
2788            both cases are in the map. */
2789    
2790            else
2791            {            {
2792              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2793            c = pcre_fcc[c];            c = pcre_fcc[c];
2794            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2795            }            }
2796    
2797          return FALSE;          return FALSE;
2798          }          }
2799    
# Line 2764  for (;;) Line 2812  for (;;)
2812            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2813            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2814            c = *eptr++;            c = *eptr++;
2815            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2816            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2817              runtime caseless, continue if either case is in the map. */
2818    
2819              if (!nasty_case)
2820                {
2821                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2822                if (md->runtime_caseless)
2823                  {
2824                  c = pcre_fcc[c];
2825                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2826                  }
2827                }
2828    
2829              /* Runtime caseless and it was a negative class. Continue only if
2830              both cases are in the map. */
2831    
2832              else
2833              {              {
2834                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2835              c = pcre_fcc[c];              c = pcre_fcc[c];
2836              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2837              }              }
2838    
2839            return FALSE;            return FALSE;
2840            }            }
2841          /* Control never gets here */          /* Control never gets here */
# Line 2779  for (;;) Line 2845  for (;;)
2845    
2846        else        else
2847          {          {
2848          uschar *pp = eptr;          const uschar *pp = eptr;
2849          for (i = min; i < max; eptr++, i++)          for (i = min; i < max; eptr++, i++)
2850            {            {
2851            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2852            c = *eptr;            c = *eptr;
2853            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2854            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2855              runtime caseless, continue if either case is in the map. */
2856    
2857              if (!nasty_case)
2858                {
2859                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2860                if (md->runtime_caseless)
2861                  {
2862                  c = pcre_fcc[c];
2863                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2864                  }
2865                }
2866    
2867              /* Runtime caseless and it was a negative class. Continue only if
2868              both cases are in the map. */
2869    
2870              else
2871              {              {
2872                if ((data[c/8] & (1 << (c&7))) == 0) break;
2873              c = pcre_fcc[c];              c = pcre_fcc[c];
2874              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2875              }              }
2876    
2877            break;            break;
2878            }            }
2879    
# Line 2807  for (;;) Line 2891  for (;;)
2891        register int length = ecode[1];        register int length = ecode[1];
2892        ecode += 2;        ecode += 2;
2893    
2894        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
2895        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
2896          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
2897        else        else
# Line 2818  for (;;) Line 2902  for (;;)
2902          }          }
2903        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
2904        printf("\n");        printf("\n");
2905        #endif  #endif
2906    
2907        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
2908        if (md->caseless)        if (md->caseless)
# Line 2875  for (;;) Line 2959  for (;;)
2959      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
2960      characters and work backwards. */      characters and work backwards. */
2961    
2962      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2963      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
2964    
2965      if (md->caseless)      if (md->caseless)
2966        {        {
# Line 2897  for (;;) Line 2979  for (;;)
2979          }          }
2980        else        else
2981          {          {
2982          uschar *pp = eptr;          const uschar *pp = eptr;
2983          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2984            {            {
2985            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
# Line 2927  for (;;) Line 3009  for (;;)
3009          }          }
3010        else        else
3011          {          {
3012          uschar *pp = eptr;          const uschar *pp = eptr;
3013          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3014            {            {
3015            if (eptr >= md->end_subject || c != *eptr) break;            if (eptr >= md->end_subject || c != *eptr) break;
# Line 2943  for (;;) Line 3025  for (;;)
3025      /* Match a negated single character */      /* Match a negated single character */
3026    
3027      case OP_NOT:      case OP_NOT:
3028      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3029      ecode++;      ecode++;
3030      if (md->caseless)      if (md->caseless)
3031        {        {
# Line 3002  for (;;) Line 3084  for (;;)
3084      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3085      characters and work backwards. */      characters and work backwards. */
3086    
3087      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3088      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3089    
3090      if (md->caseless)      if (md->caseless)
3091        {        {
# Line 3024  for (;;) Line 3104  for (;;)
3104          }          }
3105        else        else
3106          {          {
3107          uschar *pp = eptr;          const uschar *pp = eptr;
3108          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3109            {            {
3110            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
# Line 3054  for (;;) Line 3134  for (;;)
3134          }          }
3135        else        else
3136          {          {
3137          uschar *pp = eptr;          const uschar *pp = eptr;
3138          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3139            {            {
3140            if (eptr >= md->end_subject || c == *eptr) break;            if (eptr >= md->end_subject || c == *eptr) break;
# Line 3171  for (;;) Line 3251  for (;;)
3251    
3252      else      else
3253        {        {
3254        uschar *pp = eptr;        const uschar *pp = eptr;
3255        switch(ctype)        switch(ctype)
3256          {          {
3257          case OP_ANY:          case OP_ANY:
# Line 3255  for (;;) Line 3335  for (;;)
3335      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
3336    
3337      default:      default:
3338      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
3339      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3340      return FALSE;      return FALSE;
3341      }      }
# Line 3273  for (;;) Line 3351  for (;;)
3351    
3352    
3353  /*************************************************  /*************************************************
3354    *         Segregate setjmp()                     *
3355    *************************************************/
3356    
3357    /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3358    is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3359    hide it in a separate function. This is called only when PCRE_EXTRA is set,
3360    since it's needed only for the extension \X option, and with any luck, a good
3361    compiler will spot the tail recursion and compile it efficiently.
3362    
3363    Arguments:
3364       eptr        pointer in subject
3365       ecode       position in code
3366       offset_top  current top pointer
3367       md          pointer to "static" info for the match
3368    
3369    Returns:       TRUE if matched
3370    */
3371    
3372    static BOOL
3373    match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3374      match_data *match_block)
3375    {
3376    return setjmp(match_block->fail_env) == 0 &&
3377          match(eptr, ecode, offset_top, match_block);
3378    }
3379    
3380    
3381    
3382    /*************************************************
3383  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
3384  *************************************************/  *************************************************/
3385    
# Line 3299  int Line 3406  int
3406  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3407    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int options, int *offsets, int offsetcount)
3408  {  {
3409  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
3410  int first_char = -1;  int first_char = -1;
3411  match_data match_block;  match_data match_block;
3412  uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3413  uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject;
3414  uschar *end_subject;  const uschar *end_subject;
3415  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3416  real_pcre_extra *extra = (real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3417    BOOL using_temporary_offsets = FALSE;
3418  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3419  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3420    
# Line 3317  if (re == NULL || subject == NULL || Line 3424  if (re == NULL || subject == NULL ||
3424     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3425  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3426    
3427  match_block.start_subject = (uschar *)subject;  match_block.start_subject = (const uschar *)subject;
3428  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3429  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
3430    
# Line 3336  match_block.errorcode = PCRE_ERROR_NOMAT Line 3443  match_block.errorcode = PCRE_ERROR_NOMAT
3443    
3444  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
3445  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
3446  Otherwise, we can use the vector supplied, rounding down the size of it to a  Otherwise, we can use the vector supplied, rounding down its size to a multiple
3447  multiple of 2. */  of 2. */
3448    
3449  ocount &= (-2);  ocount = offsetcount & (-2);
3450  if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3451    {    {
3452    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3453    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));
3454    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3455    #ifdef DEBUG    using_temporary_offsets = TRUE;
3456    printf("Got memory to hold back references\n");    DPRINTF(("Got memory to hold back references\n"));
   #endif  
3457    }    }
3458  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
3459    
# Line 3400  if (!anchored) Line 3506  if (!anchored)
3506    
3507  do  do
3508    {    {
3509      int rc;
3510    register int *iptr = match_block.offset_vector;    register int *iptr = match_block.offset_vector;
3511    register int *iend = iptr + resetcount;    register int *iend = iptr + resetcount;
3512    
# Line 3441  do Line 3548  do
3548        }        }
3549      }      }
3550    
3551    #ifdef DEBUG  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
3552    printf(">>>> Match against: ");    printf(">>>> Match against: ");
3553    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, &match_block);
3554    printf("\n");    printf("\n");
3555    #endif  #endif
3556    
3557    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
3558    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
# Line 3455  do Line 3562  do
3562    if certain parts of the pattern were not used.    if certain parts of the pattern were not used.
3563    
3564    Before starting the match, we have to set up a longjmp() target to enable    Before starting the match, we have to set up a longjmp() target to enable
3565    the "cut" operation to fail a match completely without backtracking. */    the "cut" operation to fail a match completely without backtracking. This
3566      is done in a separate function to avoid compiler warnings. We need not do
3567      it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3568      enabled. */
3569    
3570    if (setjmp(match_block.fail_env) == 0 &&    if ((re->options & PCRE_EXTRA) != 0)
       match(start_match, re->code, 2, &match_block))  
3571      {      {
3572      int rc;      if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3573          continue;
3574      if (ocount != offsetcount)      }
3575        {    else if (!match(start_match, re->code, 2, &match_block)) continue;
       if (offsetcount >= 4)  
         {  
         memcpy(offsets + 2, match_block.offset_vector + 2,  
           (offsetcount - 2) * sizeof(int));  
         #ifdef DEBUG  
         printf("Copied offsets; freeing temporary memory\n");  
         #endif  
         }  
       if (match_block.end_offset_top > offsetcount)  
         match_block.offset_overflow = TRUE;  
3576    
3577        #ifdef DEBUG    /* Copy the offset information from temporary store if necessary */
       printf("Freeing temporary memory\n");  
       #endif  
3578    
3579        (pcre_free)(match_block.offset_vector);    if (using_temporary_offsets)
3580        {
3581        if (offsetcount >= 4)
3582          {
3583          memcpy(offsets + 2, match_block.offset_vector + 2,
3584            (offsetcount - 2) * sizeof(int));
3585          DPRINTF(("Copied offsets from temporary memory\n"));
3586        }        }
3587        if (match_block.end_offset_top > offsetcount)
3588          match_block.offset_overflow = TRUE;
3589    
3590      rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;      DPRINTF(("Freeing temporary memory\n"));
3591        (pcre_free)(match_block.offset_vector);
3592        }
3593    
3594      if (match_block.offset_end < 2) rc = 0; else    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       {  
       offsets[0] = start_match - match_block.start_subject;  
       offsets[1] = match_block.end_match_ptr - match_block.start_subject;  
       }  
3595    
3596      #ifdef DEBUG    if (match_block.offset_end < 2) rc = 0; else
3597      printf(">>>> returning %d\n", rc);      {
3598      #endif      offsets[0] = start_match - match_block.start_subject;
3599      return rc;      offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3600      }      }
3601    
3602      DPRINTF((">>>> returning %d\n", rc));
3603      return rc;
3604    }    }
3605  while (!anchored &&  while (!anchored &&
3606         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3607         start_match++ < end_subject);         start_match++ < end_subject);
3608    
3609  #ifdef DEBUG  if (using_temporary_offsets)
3610  printf(">>>> returning %d\n", match_block.errorcode);    {
3611  #endif    DPRINTF(("Freeing temporary memory\n"));
3612      (pcre_free)(match_block.offset_vector);
3613      }
3614    
3615    DPRINTF((">>>> returning %d\n", match_block.errorcode));
3616    
3617  return match_block.errorcode;  return match_block.errorcode;
3618  }  }

Legend:
Removed from v.3  
changed lines
  Added in v.13

  ViewVC Help
Powered by ViewVC 1.1.5