/[pcre]/code/trunk/pcreposix.c
ViewVC logotype

Diff of /code/trunk/pcreposix.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 359 by ph10, Wed Jul 9 16:20:19 2008 UTC revision 510 by ph10, Sat Mar 27 17:45:29 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 70  static const int eint[] = { Line 70  static const int eint[] = {
70    REG_EESCAPE, /* \c at end of pattern */    REG_EESCAPE, /* \c at end of pattern */
71    REG_EESCAPE, /* unrecognized character follows \ */    REG_EESCAPE, /* unrecognized character follows \ */
72    REG_BADBR,   /* numbers out of order in {} quantifier */    REG_BADBR,   /* numbers out of order in {} quantifier */
73      /* 5 */
74    REG_BADBR,   /* number too big in {} quantifier */    REG_BADBR,   /* number too big in {} quantifier */
75    REG_EBRACK,  /* missing terminating ] for character class */    REG_EBRACK,  /* missing terminating ] for character class */
76    REG_ECTYPE,  /* invalid escape sequence in character class */    REG_ECTYPE,  /* invalid escape sequence in character class */
77    REG_ERANGE,  /* range out of order in character class */    REG_ERANGE,  /* range out of order in character class */
78    REG_BADRPT,  /* nothing to repeat */    REG_BADRPT,  /* nothing to repeat */
79      /* 10 */
80    REG_BADRPT,  /* operand of unlimited repeat could match the empty string */    REG_BADRPT,  /* operand of unlimited repeat could match the empty string */
81    REG_ASSERT,  /* internal error: unexpected repeat */    REG_ASSERT,  /* internal error: unexpected repeat */
82    REG_BADPAT,  /* unrecognized character after (? */    REG_BADPAT,  /* unrecognized character after (? */
83    REG_BADPAT,  /* POSIX named classes are supported only within a class */    REG_BADPAT,  /* POSIX named classes are supported only within a class */
84    REG_EPAREN,  /* missing ) */    REG_EPAREN,  /* missing ) */
85      /* 15 */
86    REG_ESUBREG, /* reference to non-existent subpattern */    REG_ESUBREG, /* reference to non-existent subpattern */
87    REG_INVARG,  /* erroffset passed as NULL */    REG_INVARG,  /* erroffset passed as NULL */
88    REG_INVARG,  /* unknown option bit(s) set */    REG_INVARG,  /* unknown option bit(s) set */
89    REG_EPAREN,  /* missing ) after comment */    REG_EPAREN,  /* missing ) after comment */
90    REG_ESIZE,   /* parentheses nested too deeply */    REG_ESIZE,   /* parentheses nested too deeply */
91      /* 20 */
92    REG_ESIZE,   /* regular expression too large */    REG_ESIZE,   /* regular expression too large */
93    REG_ESPACE,  /* failed to get memory */    REG_ESPACE,  /* failed to get memory */
94    REG_EPAREN,  /* unmatched brackets */    REG_EPAREN,  /* unmatched parentheses */
95    REG_ASSERT,  /* internal error: code overflow */    REG_ASSERT,  /* internal error: code overflow */
96    REG_BADPAT,  /* unrecognized character after (?< */    REG_BADPAT,  /* unrecognized character after (?< */
97      /* 25 */
98    REG_BADPAT,  /* lookbehind assertion is not fixed length */    REG_BADPAT,  /* lookbehind assertion is not fixed length */
99    REG_BADPAT,  /* malformed number or name after (?( */    REG_BADPAT,  /* malformed number or name after (?( */
100    REG_BADPAT,  /* conditional group contains more than two branches */    REG_BADPAT,  /* conditional group contains more than two branches */
101    REG_BADPAT,  /* assertion expected after (?( */    REG_BADPAT,  /* assertion expected after (?( */
102    REG_BADPAT,  /* (?R or (?[+-]digits must be followed by ) */    REG_BADPAT,  /* (?R or (?[+-]digits must be followed by ) */
103      /* 30 */
104    REG_ECTYPE,  /* unknown POSIX class name */    REG_ECTYPE,  /* unknown POSIX class name */
105    REG_BADPAT,  /* POSIX collating elements are not supported */    REG_BADPAT,  /* POSIX collating elements are not supported */
106    REG_INVARG,  /* this version of PCRE is not compiled with PCRE_UTF8 support */    REG_INVARG,  /* this version of PCRE is not compiled with PCRE_UTF8 support */
107    REG_BADPAT,  /* spare error */    REG_BADPAT,  /* spare error */
108    REG_BADPAT,  /* character value in \x{...} sequence is too large */    REG_BADPAT,  /* character value in \x{...} sequence is too large */
109      /* 35 */
110    REG_BADPAT,  /* invalid condition (?(0) */    REG_BADPAT,  /* invalid condition (?(0) */
111    REG_BADPAT,  /* \C not allowed in lookbehind assertion */    REG_BADPAT,  /* \C not allowed in lookbehind assertion */
112    REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */    REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */
113    REG_BADPAT,  /* number after (?C is > 255 */    REG_BADPAT,  /* number after (?C is > 255 */
114    REG_BADPAT,  /* closing ) for (?C expected */    REG_BADPAT,  /* closing ) for (?C expected */
115      /* 40 */
116    REG_BADPAT,  /* recursive call could loop indefinitely */    REG_BADPAT,  /* recursive call could loop indefinitely */
117    REG_BADPAT,  /* unrecognized character after (?P */    REG_BADPAT,  /* unrecognized character after (?P */
118    REG_BADPAT,  /* syntax error in subpattern name (missing terminator) */    REG_BADPAT,  /* syntax error in subpattern name (missing terminator) */
119    REG_BADPAT,  /* two named subpatterns have the same name */    REG_BADPAT,  /* two named subpatterns have the same name */
120    REG_BADPAT,  /* invalid UTF-8 string */    REG_BADPAT,  /* invalid UTF-8 string */
121      /* 45 */
122    REG_BADPAT,  /* support for \P, \p, and \X has not been compiled */    REG_BADPAT,  /* support for \P, \p, and \X has not been compiled */
123    REG_BADPAT,  /* malformed \P or \p sequence */    REG_BADPAT,  /* malformed \P or \p sequence */
124    REG_BADPAT,  /* unknown property name after \P or \p */    REG_BADPAT,  /* unknown property name after \P or \p */
125    REG_BADPAT,  /* subpattern name is too long (maximum 32 characters) */    REG_BADPAT,  /* subpattern name is too long (maximum 32 characters) */
126    REG_BADPAT,  /* too many named subpatterns (maximum 10,000) */    REG_BADPAT,  /* too many named subpatterns (maximum 10,000) */
127      /* 50 */
128    REG_BADPAT,  /* repeated subpattern is too long */    REG_BADPAT,  /* repeated subpattern is too long */
129    REG_BADPAT,  /* octal value is greater than \377 (not in UTF-8 mode) */    REG_BADPAT,  /* octal value is greater than \377 (not in UTF-8 mode) */
130    REG_BADPAT,  /* internal error: overran compiling workspace */    REG_BADPAT,  /* internal error: overran compiling workspace */
131    REG_BADPAT,  /* internal error: previously-checked referenced subpattern not found */    REG_BADPAT,  /* internal error: previously-checked referenced subpattern not found */
132    REG_BADPAT,  /* DEFINE group contains more than one branch */    REG_BADPAT,  /* DEFINE group contains more than one branch */
133      /* 55 */
134    REG_BADPAT,  /* repeating a DEFINE group is not allowed */    REG_BADPAT,  /* repeating a DEFINE group is not allowed */
135    REG_INVARG,  /* inconsistent NEWLINE options */    REG_INVARG,  /* inconsistent NEWLINE options */
136    REG_BADPAT,  /* \g is not followed followed by an (optionally braced) non-zero number */    REG_BADPAT,  /* \g is not followed followed by an (optionally braced) non-zero number */
137    REG_BADPAT,  /* (?+ or (?- must be followed by a non-zero number */    REG_BADPAT,  /* a numbered reference must not be zero */
138      REG_BADPAT,  /* an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) */
139      /* 60 */
140      REG_BADPAT,  /* (*VERB) not recognized */
141    REG_BADPAT,  /* number is too big */    REG_BADPAT,  /* number is too big */
142    REG_BADPAT,  /* subpattern name expected */    REG_BADPAT,  /* subpattern name expected */
143    REG_BADPAT,  /* digit expected after (?+ */    REG_BADPAT,  /* digit expected after (?+ */
144    REG_BADPAT   /* ] is an invalid data character in JavaScript compatibility mode */    REG_BADPAT,  /* ] is an invalid data character in JavaScript compatibility mode */
145      /* 65 */
146      REG_BADPAT,  /* different names for subpatterns of the same number are not allowed */
147      REG_BADPAT,  /* (*MARK) must have an argument */
148  };  };
149    
150  /* Table of texts corresponding to POSIX error codes */  /* Table of texts corresponding to POSIX error codes */
# Line 226  int erroffset; Line 243  int erroffset;
243  int errorcode;  int errorcode;
244  int options = 0;  int options = 0;
245    
246  if ((cflags & REG_ICASE) != 0)   options |= PCRE_CASELESS;  if ((cflags & REG_ICASE) != 0)    options |= PCRE_CASELESS;
247  if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;  if ((cflags & REG_NEWLINE) != 0)  options |= PCRE_MULTILINE;
248  if ((cflags & REG_DOTALL) != 0)  options |= PCRE_DOTALL;  if ((cflags & REG_DOTALL) != 0)   options |= PCRE_DOTALL;
249  if ((cflags & REG_NOSUB) != 0)   options |= PCRE_NO_AUTO_CAPTURE;  if ((cflags & REG_NOSUB) != 0)    options |= PCRE_NO_AUTO_CAPTURE;
250  if ((cflags & REG_UTF8) != 0)    options |= PCRE_UTF8;  if ((cflags & REG_UTF8) != 0)     options |= PCRE_UTF8;
251    if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY;
252    
253  preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,  preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
254    &erroffset, NULL);    &erroffset, NULL);
255  preg->re_erroffset = erroffset;  preg->re_erroffset = erroffset;
256    
257  if (preg->re_pcre == NULL) return eint[errorcode];  /* Safety: if the error code is too big for the translation vector (which
258    should not happen, but we all make mistakes), return REG_BADPAT. */
259    
260    if (preg->re_pcre == NULL)
261      {
262      return (errorcode < sizeof(eint)/sizeof(const int))?
263        eint[errorcode] : REG_BADPAT;
264      }
265    
266  preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);  preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
267  return 0;  return 0;
# Line 274  BOOL nosub = Line 299  BOOL nosub =
299    
300  if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;  if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
301  if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;  if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
302    if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE_NOTEMPTY;
303    
304  ((regex_t *)preg)->re_erroffset = (size_t)(-1);  /* Only has meaning after compile */  ((regex_t *)preg)->re_erroffset = (size_t)(-1);  /* Only has meaning after compile */
305    
306  /* When no string data is being returned, ensure that nmatch is zero.  /* When no string data is being returned, or no vector has been passed in which
307  Otherwise, ensure the vector for holding the return data is large enough. */  to put it, ensure that nmatch is zero. Otherwise, ensure the vector for holding
308    the return data is large enough. */
309    
310  if (nosub) nmatch = 0;  if (nosub || pmatch == NULL) nmatch = 0;
311    
312  else if (nmatch > 0)  else if (nmatch > 0)
313    {    {
# Line 318  rc = pcre_exec((const pcre *)preg->re_pc Line 345  rc = pcre_exec((const pcre *)preg->re_pc
345    
346  if (rc == 0) rc = nmatch;    /* All captured slots were filled in */  if (rc == 0) rc = nmatch;    /* All captured slots were filled in */
347    
348    /* Successful match */
349    
350  if (rc >= 0)  if (rc >= 0)
351    {    {
352    size_t i;    size_t i;
# Line 334  if (rc >= 0) Line 363  if (rc >= 0)
363    return 0;    return 0;
364    }    }
365    
366  else  /* Unsuccessful match */
367    
368    if (allocated_ovector) free(ovector);
369    switch(rc)
370    {    {
371    if (allocated_ovector) free(ovector);  /* ========================================================================== */
372    switch(rc)    /* These cases are never obeyed. This is a fudge that causes a compile-time
373      {    error if the vector eint, which is indexed by compile-time error number, is
374      case PCRE_ERROR_NOMATCH: return REG_NOMATCH;    not the correct length. It seems to be the only way to do such a check at
375      case PCRE_ERROR_NULL: return REG_INVARG;    compile time, as the sizeof() operator does not work in the C preprocessor.
376      case PCRE_ERROR_BADOPTION: return REG_INVARG;    As all the PCRE_ERROR_xxx values are negative, we can use 0 and 1. */
377      case PCRE_ERROR_BADMAGIC: return REG_INVARG;  
378      case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;    case 0:
379      case PCRE_ERROR_NOMEMORY: return REG_ESPACE;    case (sizeof(eint)/sizeof(int) == ERRCOUNT):
380      case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;    return REG_ASSERT;
381      case PCRE_ERROR_BADUTF8: return REG_INVARG;  /* ========================================================================== */
382      case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;  
383      default: return REG_ASSERT;    case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
384      }    case PCRE_ERROR_NULL: return REG_INVARG;
385      case PCRE_ERROR_BADOPTION: return REG_INVARG;
386      case PCRE_ERROR_BADMAGIC: return REG_INVARG;
387      case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
388      case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
389      case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
390      case PCRE_ERROR_BADUTF8: return REG_INVARG;
391      case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
392      default: return REG_ASSERT;
393    }    }
394  }  }
395    

Legend:
Removed from v.359  
changed lines
  Added in v.510

  ViewVC Help
Powered by ViewVC 1.1.5