/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 107 by ph10, Wed Mar 7 11:02:28 2007 UTC revision 167 by ph10, Wed May 9 15:53:54 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 208  static const char *error_texts[] = { Line 208  static const char *error_texts[] = {
208    "malformed number or name after (?(",    "malformed number or name after (?(",
209    "conditional group contains more than two branches",    "conditional group contains more than two branches",
210    "assertion expected after (?(",    "assertion expected after (?(",
211    "(?R or (?digits must be followed by )",    "(?R or (?[+-]digits must be followed by )",
212    /* 30 */    /* 30 */
213    "unknown POSIX class name",    "unknown POSIX class name",
214    "POSIX collating elements are not supported",    "POSIX collating elements are not supported",
# Line 242  static const char *error_texts[] = { Line 242  static const char *error_texts[] = {
242    /* 55 */    /* 55 */
243    "repeating a DEFINE group is not allowed",    "repeating a DEFINE group is not allowed",
244    "inconsistent NEWLINE options",    "inconsistent NEWLINE options",
245    "\\g is not followed by an (optionally braced) non-zero number"    "\\g is not followed by an (optionally braced) non-zero number",
246      "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number"
247  };  };
248    
249    
# Line 1267  for (;;) Line 1268  for (;;)
1268        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1269        break;        break;
1270        }        }
1271  #endif  #endif
1272      }      }
1273    }    }
1274  }  }
# Line 1332  for (;;) Line 1333  for (;;)
1333        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1334        break;        break;
1335        }        }
1336  #endif  #endif
1337      }      }
1338    }    }
1339  }  }
# Line 2095  for (;; ptr++) Line 2096  for (;; ptr++)
2096    int class_lastchar;    int class_lastchar;
2097    int newoptions;    int newoptions;
2098    int recno;    int recno;
2099      int refsign;
2100    int skipbytes;    int skipbytes;
2101    int subreqbyte;    int subreqbyte;
2102    int subfirstbyte;    int subfirstbyte;
# Line 3621  for (;; ptr++) Line 3623  for (;; ptr++)
3623    
3624          code[1+LINK_SIZE] = OP_CREF;          code[1+LINK_SIZE] = OP_CREF;
3625          skipbytes = 3;          skipbytes = 3;
3626            refsign = -1;
3627    
3628          /* Check for a test for recursion in a named group. */          /* Check for a test for recursion in a named group. */
3629    
# Line 3644  for (;; ptr++) Line 3647  for (;; ptr++)
3647            terminator = '\'';            terminator = '\'';
3648            ptr++;            ptr++;
3649            }            }
3650          else terminator = 0;          else
3651              {
3652              terminator = 0;
3653              if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
3654              }
3655    
3656          /* We now expect to read a name; any thing else is an error */          /* We now expect to read a name; any thing else is an error */
3657    
# Line 3680  for (;; ptr++) Line 3687  for (;; ptr++)
3687          if (lengthptr != NULL) break;          if (lengthptr != NULL) break;
3688    
3689          /* In the real compile we do the work of looking for the actual          /* In the real compile we do the work of looking for the actual
3690          reference. */          reference. If the string started with "+" or "-" we require the rest to
3691            be digits, in which case recno will be set. */
3692    
3693            if (refsign > 0)
3694              {
3695              if (recno <= 0)
3696                {
3697                *errorcodeptr = ERR58;
3698                goto FAILED;
3699                }
3700              if (refsign == '-')
3701                {
3702                recno = cd->bracount - recno + 1;
3703                if (recno <= 0)
3704                  {
3705                  *errorcodeptr = ERR15;
3706                  goto FAILED;
3707                  }
3708                }
3709              else recno += cd->bracount;
3710              PUT2(code, 2+LINK_SIZE, recno);
3711              break;
3712              }
3713    
3714            /* Otherwise (did not start with "+" or "-"), start by looking for the
3715            name. */
3716    
3717          slot = cd->name_table;          slot = cd->name_table;
3718          for (i = 0; i < cd->names_found; i++)          for (i = 0; i < cd->names_found; i++)
3719            {            {
# Line 3999  for (;; ptr++) Line 4031  for (;; ptr++)
4031    
4032    
4033          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4034            case '-': case '+':
4035          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */          case '0': case '1': case '2': case '3': case '4':   /* Recursion or */
4036          case '5': case '6': case '7': case '8': case '9':   /* subroutine */          case '5': case '6': case '7': case '8': case '9':   /* subroutine */
4037            {            {
4038            const uschar *called;            const uschar *called;
4039    
4040              if ((refsign = *ptr) == '+') ptr++;
4041              else if (refsign == '-')
4042                {
4043                if ((digitab[ptr[1]] & ctype_digit) == 0)
4044                  goto OTHER_CHAR_AFTER_QUERY;
4045                ptr++;
4046                }
4047    
4048            recno = 0;            recno = 0;
4049            while((digitab[*ptr] & ctype_digit) != 0)            while((digitab[*ptr] & ctype_digit) != 0)
4050              recno = recno * 10 + *ptr++ - '0';              recno = recno * 10 + *ptr++ - '0';
4051    
4052            if (*ptr != ')')            if (*ptr != ')')
4053              {              {
4054              *errorcodeptr = ERR29;              *errorcodeptr = ERR29;
4055              goto FAILED;              goto FAILED;
4056              }              }
4057    
4058              if (refsign == '-')
4059                {
4060                if (recno == 0)
4061                  {
4062                  *errorcodeptr = ERR58;
4063                  goto FAILED;
4064                  }
4065                recno = cd->bracount - recno + 1;
4066                if (recno <= 0)
4067                  {
4068                  *errorcodeptr = ERR15;
4069                  goto FAILED;
4070                  }
4071                }
4072              else if (refsign == '+')
4073                {
4074                if (recno == 0)
4075                  {
4076                  *errorcodeptr = ERR58;
4077                  goto FAILED;
4078                  }
4079                recno += cd->bracount;
4080                }
4081    
4082            /* Come here from code above that handles a named recursion */            /* Come here from code above that handles a named recursion */
4083    
# Line 4084  for (;; ptr++) Line 4151  for (;; ptr++)
4151    
4152          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
4153          default:              /* Other characters: check option setting */          default:              /* Other characters: check option setting */
4154            OTHER_CHAR_AFTER_QUERY:
4155          set = unset = 0;          set = unset = 0;
4156          optset = &set;          optset = &set;
4157    
# Line 5043  Returns:        pointer to compiled data Line 5111  Returns:        pointer to compiled data
5111                  with errorptr and erroroffset set                  with errorptr and erroroffset set
5112  */  */
5113    
5114  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5115  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
5116    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
5117  {  {
# Line 5051  return pcre_compile2(pattern, options, N Line 5119  return pcre_compile2(pattern, options, N
5119  }  }
5120    
5121    
5122  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5123  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
5124    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
5125  {  {
# Line 5100  if (errorcodeptr != NULL) *errorcodeptr Line 5168  if (errorcodeptr != NULL) *errorcodeptr
5168  if (erroroffset == NULL)  if (erroroffset == NULL)
5169    {    {
5170    errorcode = ERR16;    errorcode = ERR16;
5171    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5172    }    }
5173    
5174  *erroroffset = 0;  *erroroffset = 0;
# Line 5113  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 5181  if (utf8 && (options & PCRE_NO_UTF8_CHEC
5181       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
5182    {    {
5183    errorcode = ERR44;    errorcode = ERR44;
5184    goto PCRE_UTF8_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5185    }    }
5186  #else  #else
5187  if ((options & PCRE_UTF8) != 0)  if ((options & PCRE_UTF8) != 0)
# Line 5138  cd->cbits = tables + cbits_offset; Line 5206  cd->cbits = tables + cbits_offset;
5206  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
5207    
5208  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The
5209  current code allows for fixed one- or two-byte sequences, plus "any". */  current code allows for fixed one- or two-byte sequences, plus "any" and
5210    "anycrlf". */
5211    
5212  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
5213    {    {
# Line 5148  switch (options & (PCRE_NEWLINE_CRLF | P Line 5217  switch (options & (PCRE_NEWLINE_CRLF | P
5217    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
5218         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
5219    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
5220      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5221    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
5222    }    }
5223    
5224  if (newline < 0)  if (newline == -2)
5225      {
5226      cd->nltype = NLTYPE_ANYCRLF;
5227      }
5228    else if (newline < 0)
5229    {    {
5230    cd->nltype = NLTYPE_ANY;    cd->nltype = NLTYPE_ANY;
5231    }    }
# Line 5325  if (errorcode != 0) Line 5399  if (errorcode != 0)
5399    (pcre_free)(re);    (pcre_free)(re);
5400    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
5401    *erroroffset = ptr - (const uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
5402  #ifdef SUPPORT_UTF8    PCRE_EARLY_ERROR_RETURN2:
   PCRE_UTF8_ERROR_RETURN:  
 #endif  
5403    *errorptr = error_texts[errorcode];    *errorptr = error_texts[errorcode];
5404    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
5405    return NULL;    return NULL;
# Line 5417  if ((re->options & PCRE_REQCHSET) != 0) Line 5489  if ((re->options & PCRE_REQCHSET) != 0)
5489      else printf("Req char = \\x%02x%s\n", ch, caseless);      else printf("Req char = \\x%02x%s\n", ch, caseless);
5490    }    }
5491    
5492  pcre_printint(re, stdout);  pcre_printint(re, stdout, TRUE);
5493    
5494  /* This check is done here in the debugging case so that the code that  /* This check is done here in the debugging case so that the code that
5495  was compiled can be seen. */  was compiled can be seen. */

Legend:
Removed from v.107  
changed lines
  Added in v.167

  ViewVC Help
Powered by ViewVC 1.1.5