/[pcre]/code/trunk/pcreposix.c
ViewVC logotype

Diff of /code/trunk/pcreposix.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 418 by ph10, Fri Apr 24 09:17:18 2009 UTC revision 654 by ph10, Tue Aug 2 11:00:40 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 57  previously been set. */ Line 57  previously been set. */
57  #  define PCREPOSIX_EXP_DEFN __declspec(dllexport)  #  define PCREPOSIX_EXP_DEFN __declspec(dllexport)
58  #endif  #endif
59    
60    /* We include pcre.h before pcre_internal.h so that the PCRE library functions
61    are declared as "import" for Windows by defining PCRE_EXP_DECL as "import".
62    This is needed even though pcre_internal.h itself includes pcre.h, because it
63    does so after it has set PCRE_EXP_DECL to "export" if it is not already set. */
64    
65  #include "pcre.h"  #include "pcre.h"
66  #include "pcre_internal.h"  #include "pcre_internal.h"
67  #include "pcreposix.h"  #include "pcreposix.h"
# Line 70  static const int eint[] = { Line 75  static const int eint[] = {
75    REG_EESCAPE, /* \c at end of pattern */    REG_EESCAPE, /* \c at end of pattern */
76    REG_EESCAPE, /* unrecognized character follows \ */    REG_EESCAPE, /* unrecognized character follows \ */
77    REG_BADBR,   /* numbers out of order in {} quantifier */    REG_BADBR,   /* numbers out of order in {} quantifier */
78    /* 5 */    /* 5 */
79    REG_BADBR,   /* number too big in {} quantifier */    REG_BADBR,   /* number too big in {} quantifier */
80    REG_EBRACK,  /* missing terminating ] for character class */    REG_EBRACK,  /* missing terminating ] for character class */
81    REG_ECTYPE,  /* invalid escape sequence in character class */    REG_ECTYPE,  /* invalid escape sequence in character class */
82    REG_ERANGE,  /* range out of order in character class */    REG_ERANGE,  /* range out of order in character class */
83    REG_BADRPT,  /* nothing to repeat */    REG_BADRPT,  /* nothing to repeat */
84    /* 10 */    /* 10 */
85    REG_BADRPT,  /* operand of unlimited repeat could match the empty string */    REG_BADRPT,  /* operand of unlimited repeat could match the empty string */
86    REG_ASSERT,  /* internal error: unexpected repeat */    REG_ASSERT,  /* internal error: unexpected repeat */
87    REG_BADPAT,  /* unrecognized character after (? */    REG_BADPAT,  /* unrecognized character after (? */
88    REG_BADPAT,  /* POSIX named classes are supported only within a class */    REG_BADPAT,  /* POSIX named classes are supported only within a class */
89    REG_EPAREN,  /* missing ) */    REG_EPAREN,  /* missing ) */
90    /* 15 */    /* 15 */
91    REG_ESUBREG, /* reference to non-existent subpattern */    REG_ESUBREG, /* reference to non-existent subpattern */
92    REG_INVARG,  /* erroffset passed as NULL */    REG_INVARG,  /* erroffset passed as NULL */
93    REG_INVARG,  /* unknown option bit(s) set */    REG_INVARG,  /* unknown option bit(s) set */
94    REG_EPAREN,  /* missing ) after comment */    REG_EPAREN,  /* missing ) after comment */
95    REG_ESIZE,   /* parentheses nested too deeply */    REG_ESIZE,   /* parentheses nested too deeply */
96    /* 20 */    /* 20 */
97    REG_ESIZE,   /* regular expression too large */    REG_ESIZE,   /* regular expression too large */
98    REG_ESPACE,  /* failed to get memory */    REG_ESPACE,  /* failed to get memory */
99    REG_EPAREN,  /* unmatched parentheses */    REG_EPAREN,  /* unmatched parentheses */
100    REG_ASSERT,  /* internal error: code overflow */    REG_ASSERT,  /* internal error: code overflow */
101    REG_BADPAT,  /* unrecognized character after (?< */    REG_BADPAT,  /* unrecognized character after (?< */
102    /* 25 */    /* 25 */
103    REG_BADPAT,  /* lookbehind assertion is not fixed length */    REG_BADPAT,  /* lookbehind assertion is not fixed length */
104    REG_BADPAT,  /* malformed number or name after (?( */    REG_BADPAT,  /* malformed number or name after (?( */
105    REG_BADPAT,  /* conditional group contains more than two branches */    REG_BADPAT,  /* conditional group contains more than two branches */
106    REG_BADPAT,  /* assertion expected after (?( */    REG_BADPAT,  /* assertion expected after (?( */
107    REG_BADPAT,  /* (?R or (?[+-]digits must be followed by ) */    REG_BADPAT,  /* (?R or (?[+-]digits must be followed by ) */
108    /* 30 */    /* 30 */
109    REG_ECTYPE,  /* unknown POSIX class name */    REG_ECTYPE,  /* unknown POSIX class name */
110    REG_BADPAT,  /* POSIX collating elements are not supported */    REG_BADPAT,  /* POSIX collating elements are not supported */
111    REG_INVARG,  /* this version of PCRE is not compiled with PCRE_UTF8 support */    REG_INVARG,  /* this version of PCRE is not compiled with PCRE_UTF8 support */
112    REG_BADPAT,  /* spare error */    REG_BADPAT,  /* spare error */
113    REG_BADPAT,  /* character value in \x{...} sequence is too large */    REG_BADPAT,  /* character value in \x{...} sequence is too large */
114    /* 35 */    /* 35 */
115    REG_BADPAT,  /* invalid condition (?(0) */    REG_BADPAT,  /* invalid condition (?(0) */
116    REG_BADPAT,  /* \C not allowed in lookbehind assertion */    REG_BADPAT,  /* \C not allowed in lookbehind assertion */
117    REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */    REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */
118    REG_BADPAT,  /* number after (?C is > 255 */    REG_BADPAT,  /* number after (?C is > 255 */
119    REG_BADPAT,  /* closing ) for (?C expected */    REG_BADPAT,  /* closing ) for (?C expected */
120    /* 40 */    /* 40 */
121    REG_BADPAT,  /* recursive call could loop indefinitely */    REG_BADPAT,  /* recursive call could loop indefinitely */
122    REG_BADPAT,  /* unrecognized character after (?P */    REG_BADPAT,  /* unrecognized character after (?P */
123    REG_BADPAT,  /* syntax error in subpattern name (missing terminator) */    REG_BADPAT,  /* syntax error in subpattern name (missing terminator) */
124    REG_BADPAT,  /* two named subpatterns have the same name */    REG_BADPAT,  /* two named subpatterns have the same name */
125    REG_BADPAT,  /* invalid UTF-8 string */    REG_BADPAT,  /* invalid UTF-8 string */
126    /* 45 */    /* 45 */
127    REG_BADPAT,  /* support for \P, \p, and \X has not been compiled */    REG_BADPAT,  /* support for \P, \p, and \X has not been compiled */
128    REG_BADPAT,  /* malformed \P or \p sequence */    REG_BADPAT,  /* malformed \P or \p sequence */
129    REG_BADPAT,  /* unknown property name after \P or \p */    REG_BADPAT,  /* unknown property name after \P or \p */
130    REG_BADPAT,  /* subpattern name is too long (maximum 32 characters) */    REG_BADPAT,  /* subpattern name is too long (maximum 32 characters) */
131    REG_BADPAT,  /* too many named subpatterns (maximum 10,000) */    REG_BADPAT,  /* too many named subpatterns (maximum 10,000) */
132    /* 50 */    /* 50 */
133    REG_BADPAT,  /* repeated subpattern is too long */    REG_BADPAT,  /* repeated subpattern is too long */
134    REG_BADPAT,  /* octal value is greater than \377 (not in UTF-8 mode) */    REG_BADPAT,  /* octal value is greater than \377 (not in UTF-8 mode) */
135    REG_BADPAT,  /* internal error: overran compiling workspace */    REG_BADPAT,  /* internal error: overran compiling workspace */
136    REG_BADPAT,  /* internal error: previously-checked referenced subpattern not found */    REG_BADPAT,  /* internal error: previously-checked referenced subpattern not found */
137    REG_BADPAT,  /* DEFINE group contains more than one branch */    REG_BADPAT,  /* DEFINE group contains more than one branch */
138    /* 55 */    /* 55 */
139    REG_BADPAT,  /* repeating a DEFINE group is not allowed */    REG_BADPAT,  /* repeating a DEFINE group is not allowed */
140    REG_INVARG,  /* inconsistent NEWLINE options */    REG_INVARG,  /* inconsistent NEWLINE options */
141    REG_BADPAT,  /* \g is not followed followed by an (optionally braced) non-zero number */    REG_BADPAT,  /* \g is not followed followed by an (optionally braced) non-zero number */
142    REG_BADPAT,  /* a numbered reference must not be zero */    REG_BADPAT,  /* a numbered reference must not be zero */
143    REG_BADPAT,  /* (*VERB) with an argument is not supported */    REG_BADPAT,  /* an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT) */
144    /* 60 */    /* 60 */
145    REG_BADPAT,  /* (*VERB) not recognized */    REG_BADPAT,  /* (*VERB) not recognized */
146    REG_BADPAT,  /* number is too big */    REG_BADPAT,  /* number is too big */
147    REG_BADPAT,  /* subpattern name expected */    REG_BADPAT,  /* subpattern name expected */
148    REG_BADPAT,  /* digit expected after (?+ */    REG_BADPAT,  /* digit expected after (?+ */
149    REG_BADPAT   /* ] is an invalid data character in JavaScript compatibility mode */    REG_BADPAT,  /* ] is an invalid data character in JavaScript compatibility mode */
150      /* 65 */
151      REG_BADPAT,  /* different names for subpatterns of the same number are not allowed */
152      REG_BADPAT,  /* (*MARK) must have an argument */
153      REG_INVARG,  /* this version of PCRE is not compiled with PCRE_UCP support */
154      REG_BADPAT,  /* \c must be followed by an ASCII character */
155      REG_BADPAT,  /* \k is not followed by a braced, angle-bracketed, or quoted name */
156  };  };
157    
158  /* Table of texts corresponding to POSIX error codes */  /* Table of texts corresponding to POSIX error codes */
# Line 240  int erroffset; Line 251  int erroffset;
251  int errorcode;  int errorcode;
252  int options = 0;  int options = 0;
253    
254  if ((cflags & REG_ICASE) != 0)   options |= PCRE_CASELESS;  if ((cflags & REG_ICASE) != 0)    options |= PCRE_CASELESS;
255  if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;  if ((cflags & REG_NEWLINE) != 0)  options |= PCRE_MULTILINE;
256  if ((cflags & REG_DOTALL) != 0)  options |= PCRE_DOTALL;  if ((cflags & REG_DOTALL) != 0)   options |= PCRE_DOTALL;
257  if ((cflags & REG_NOSUB) != 0)   options |= PCRE_NO_AUTO_CAPTURE;  if ((cflags & REG_NOSUB) != 0)    options |= PCRE_NO_AUTO_CAPTURE;
258  if ((cflags & REG_UTF8) != 0)    options |= PCRE_UTF8;  if ((cflags & REG_UTF8) != 0)     options |= PCRE_UTF8;
259    if ((cflags & REG_UCP) != 0)      options |= PCRE_UCP;
260    if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY;
261    
262  preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,  preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr,
263    &erroffset, NULL);    &erroffset, NULL);
264  preg->re_erroffset = erroffset;  preg->re_erroffset = erroffset;
265    
266  /* Safety: if the error code is too big for the translation vector (which  /* Safety: if the error code is too big for the translation vector (which
267  should not happen, but we all make mistakes), return REG_BADPAT. */  should not happen, but we all make mistakes), return REG_BADPAT. */
268    
269  if (preg->re_pcre == NULL)  if (preg->re_pcre == NULL)
270    {    {
271    return (errorcode < sizeof(eint)/sizeof(const int))?    return (errorcode < sizeof(eint)/sizeof(const int))?
272      eint[errorcode] : REG_BADPAT;      eint[errorcode] : REG_BADPAT;
273    }    }
274    
275  preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);  preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL);
276  return 0;  return 0;
# Line 299  if ((eflags & REG_NOTEMPTY) != 0) option Line 312  if ((eflags & REG_NOTEMPTY) != 0) option
312    
313  ((regex_t *)preg)->re_erroffset = (size_t)(-1);  /* Only has meaning after compile */  ((regex_t *)preg)->re_erroffset = (size_t)(-1);  /* Only has meaning after compile */
314    
315  /* When no string data is being returned, ensure that nmatch is zero.  /* When no string data is being returned, or no vector has been passed in which
316  Otherwise, ensure the vector for holding the return data is large enough. */  to put it, ensure that nmatch is zero. Otherwise, ensure the vector for holding
317    the return data is large enough. */
318    
319  if (nosub) nmatch = 0;  if (nosub || pmatch == NULL) nmatch = 0;
320    
321  else if (nmatch > 0)  else if (nmatch > 0)
322    {    {
# Line 332  if ((eflags & REG_STARTEND) != 0) Line 346  if ((eflags & REG_STARTEND) != 0)
346  else  else
347    {    {
348    so = 0;    so = 0;
349    eo = strlen(string);    eo = (int)strlen(string);
350    }    }
351    
352  rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so),  rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so),
353    0, options, ovector, nmatch * 3);    0, options, ovector, (int)(nmatch * 3));
354    
355  if (rc == 0) rc = nmatch;    /* All captured slots were filled in */  if (rc == 0) rc = (int)nmatch;    /* All captured slots were filled in */
356    
357    /* Successful match */
358    
359  if (rc >= 0)  if (rc >= 0)
360    {    {
# Line 356  if (rc >= 0) Line 372  if (rc >= 0)
372    return 0;    return 0;
373    }    }
374    
375  else  /* Unsuccessful match */
376    
377    if (allocated_ovector) free(ovector);
378    switch(rc)
379    {    {
380    if (allocated_ovector) free(ovector);  /* ========================================================================== */
381    switch(rc)    /* These cases are never obeyed. This is a fudge that causes a compile-time
382      {    error if the vector eint, which is indexed by compile-time error number, is
383      case PCRE_ERROR_NOMATCH: return REG_NOMATCH;    not the correct length. It seems to be the only way to do such a check at
384      case PCRE_ERROR_NULL: return REG_INVARG;    compile time, as the sizeof() operator does not work in the C preprocessor.
385      case PCRE_ERROR_BADOPTION: return REG_INVARG;    As all the PCRE_ERROR_xxx values are negative, we can use 0 and 1. */
386      case PCRE_ERROR_BADMAGIC: return REG_INVARG;  
387      case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;    case 0:
388      case PCRE_ERROR_NOMEMORY: return REG_ESPACE;    case (sizeof(eint)/sizeof(int) == ERRCOUNT):
389      case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;    return REG_ASSERT;
390      case PCRE_ERROR_BADUTF8: return REG_INVARG;  /* ========================================================================== */
391      case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;  
392      default: return REG_ASSERT;    case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
393      }    case PCRE_ERROR_NULL: return REG_INVARG;
394      case PCRE_ERROR_BADOPTION: return REG_INVARG;
395      case PCRE_ERROR_BADMAGIC: return REG_INVARG;
396      case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
397      case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
398      case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE;
399      case PCRE_ERROR_BADUTF8: return REG_INVARG;
400      case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG;
401      default: return REG_ASSERT;
402    }    }
403  }  }
404    

Legend:
Removed from v.418  
changed lines
  Added in v.654

  ViewVC Help
Powered by ViewVC 1.1.5