6 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
7 |
|
|
8 |
Written by Philip Hazel |
Written by Philip Hazel |
9 |
Copyright (c) 1997-2008 University of Cambridge |
Copyright (c) 1997-2009 University of Cambridge |
10 |
|
|
11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
70 |
REG_EESCAPE, /* \c at end of pattern */ |
REG_EESCAPE, /* \c at end of pattern */ |
71 |
REG_EESCAPE, /* unrecognized character follows \ */ |
REG_EESCAPE, /* unrecognized character follows \ */ |
72 |
REG_BADBR, /* numbers out of order in {} quantifier */ |
REG_BADBR, /* numbers out of order in {} quantifier */ |
73 |
|
/* 5 */ |
74 |
REG_BADBR, /* number too big in {} quantifier */ |
REG_BADBR, /* number too big in {} quantifier */ |
75 |
REG_EBRACK, /* missing terminating ] for character class */ |
REG_EBRACK, /* missing terminating ] for character class */ |
76 |
REG_ECTYPE, /* invalid escape sequence in character class */ |
REG_ECTYPE, /* invalid escape sequence in character class */ |
77 |
REG_ERANGE, /* range out of order in character class */ |
REG_ERANGE, /* range out of order in character class */ |
78 |
REG_BADRPT, /* nothing to repeat */ |
REG_BADRPT, /* nothing to repeat */ |
79 |
|
/* 10 */ |
80 |
REG_BADRPT, /* operand of unlimited repeat could match the empty string */ |
REG_BADRPT, /* operand of unlimited repeat could match the empty string */ |
81 |
REG_ASSERT, /* internal error: unexpected repeat */ |
REG_ASSERT, /* internal error: unexpected repeat */ |
82 |
REG_BADPAT, /* unrecognized character after (? */ |
REG_BADPAT, /* unrecognized character after (? */ |
83 |
REG_BADPAT, /* POSIX named classes are supported only within a class */ |
REG_BADPAT, /* POSIX named classes are supported only within a class */ |
84 |
REG_EPAREN, /* missing ) */ |
REG_EPAREN, /* missing ) */ |
85 |
|
/* 15 */ |
86 |
REG_ESUBREG, /* reference to non-existent subpattern */ |
REG_ESUBREG, /* reference to non-existent subpattern */ |
87 |
REG_INVARG, /* erroffset passed as NULL */ |
REG_INVARG, /* erroffset passed as NULL */ |
88 |
REG_INVARG, /* unknown option bit(s) set */ |
REG_INVARG, /* unknown option bit(s) set */ |
89 |
REG_EPAREN, /* missing ) after comment */ |
REG_EPAREN, /* missing ) after comment */ |
90 |
REG_ESIZE, /* parentheses nested too deeply */ |
REG_ESIZE, /* parentheses nested too deeply */ |
91 |
|
/* 20 */ |
92 |
REG_ESIZE, /* regular expression too large */ |
REG_ESIZE, /* regular expression too large */ |
93 |
REG_ESPACE, /* failed to get memory */ |
REG_ESPACE, /* failed to get memory */ |
94 |
REG_EPAREN, /* unmatched brackets */ |
REG_EPAREN, /* unmatched parentheses */ |
95 |
REG_ASSERT, /* internal error: code overflow */ |
REG_ASSERT, /* internal error: code overflow */ |
96 |
REG_BADPAT, /* unrecognized character after (?< */ |
REG_BADPAT, /* unrecognized character after (?< */ |
97 |
|
/* 25 */ |
98 |
REG_BADPAT, /* lookbehind assertion is not fixed length */ |
REG_BADPAT, /* lookbehind assertion is not fixed length */ |
99 |
REG_BADPAT, /* malformed number or name after (?( */ |
REG_BADPAT, /* malformed number or name after (?( */ |
100 |
REG_BADPAT, /* conditional group contains more than two branches */ |
REG_BADPAT, /* conditional group contains more than two branches */ |
101 |
REG_BADPAT, /* assertion expected after (?( */ |
REG_BADPAT, /* assertion expected after (?( */ |
102 |
REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */ |
REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */ |
103 |
|
/* 30 */ |
104 |
REG_ECTYPE, /* unknown POSIX class name */ |
REG_ECTYPE, /* unknown POSIX class name */ |
105 |
REG_BADPAT, /* POSIX collating elements are not supported */ |
REG_BADPAT, /* POSIX collating elements are not supported */ |
106 |
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */ |
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */ |
107 |
REG_BADPAT, /* spare error */ |
REG_BADPAT, /* spare error */ |
108 |
REG_BADPAT, /* character value in \x{...} sequence is too large */ |
REG_BADPAT, /* character value in \x{...} sequence is too large */ |
109 |
|
/* 35 */ |
110 |
REG_BADPAT, /* invalid condition (?(0) */ |
REG_BADPAT, /* invalid condition (?(0) */ |
111 |
REG_BADPAT, /* \C not allowed in lookbehind assertion */ |
REG_BADPAT, /* \C not allowed in lookbehind assertion */ |
112 |
REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */ |
REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */ |
113 |
REG_BADPAT, /* number after (?C is > 255 */ |
REG_BADPAT, /* number after (?C is > 255 */ |
114 |
REG_BADPAT, /* closing ) for (?C expected */ |
REG_BADPAT, /* closing ) for (?C expected */ |
115 |
|
/* 40 */ |
116 |
REG_BADPAT, /* recursive call could loop indefinitely */ |
REG_BADPAT, /* recursive call could loop indefinitely */ |
117 |
REG_BADPAT, /* unrecognized character after (?P */ |
REG_BADPAT, /* unrecognized character after (?P */ |
118 |
REG_BADPAT, /* syntax error in subpattern name (missing terminator) */ |
REG_BADPAT, /* syntax error in subpattern name (missing terminator) */ |
119 |
REG_BADPAT, /* two named subpatterns have the same name */ |
REG_BADPAT, /* two named subpatterns have the same name */ |
120 |
REG_BADPAT, /* invalid UTF-8 string */ |
REG_BADPAT, /* invalid UTF-8 string */ |
121 |
|
/* 45 */ |
122 |
REG_BADPAT, /* support for \P, \p, and \X has not been compiled */ |
REG_BADPAT, /* support for \P, \p, and \X has not been compiled */ |
123 |
REG_BADPAT, /* malformed \P or \p sequence */ |
REG_BADPAT, /* malformed \P or \p sequence */ |
124 |
REG_BADPAT, /* unknown property name after \P or \p */ |
REG_BADPAT, /* unknown property name after \P or \p */ |
125 |
REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */ |
REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */ |
126 |
REG_BADPAT, /* too many named subpatterns (maximum 10,000) */ |
REG_BADPAT, /* too many named subpatterns (maximum 10,000) */ |
127 |
|
/* 50 */ |
128 |
REG_BADPAT, /* repeated subpattern is too long */ |
REG_BADPAT, /* repeated subpattern is too long */ |
129 |
REG_BADPAT, /* octal value is greater than \377 (not in UTF-8 mode) */ |
REG_BADPAT, /* octal value is greater than \377 (not in UTF-8 mode) */ |
130 |
REG_BADPAT, /* internal error: overran compiling workspace */ |
REG_BADPAT, /* internal error: overran compiling workspace */ |
131 |
REG_BADPAT, /* internal error: previously-checked referenced subpattern not found */ |
REG_BADPAT, /* internal error: previously-checked referenced subpattern not found */ |
132 |
REG_BADPAT, /* DEFINE group contains more than one branch */ |
REG_BADPAT, /* DEFINE group contains more than one branch */ |
133 |
|
/* 55 */ |
134 |
REG_BADPAT, /* repeating a DEFINE group is not allowed */ |
REG_BADPAT, /* repeating a DEFINE group is not allowed */ |
135 |
REG_INVARG, /* inconsistent NEWLINE options */ |
REG_INVARG, /* inconsistent NEWLINE options */ |
136 |
REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */ |
REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */ |
137 |
REG_BADPAT, /* (?+ or (?- must be followed by a non-zero number */ |
REG_BADPAT, /* a numbered reference must not be zero */ |
138 |
|
REG_BADPAT, /* (*VERB) with an argument is not supported */ |
139 |
|
/* 60 */ |
140 |
|
REG_BADPAT, /* (*VERB) not recognized */ |
141 |
REG_BADPAT, /* number is too big */ |
REG_BADPAT, /* number is too big */ |
142 |
REG_BADPAT, /* subpattern name expected */ |
REG_BADPAT, /* subpattern name expected */ |
143 |
REG_BADPAT, /* digit expected after (?+ */ |
REG_BADPAT, /* digit expected after (?+ */ |
144 |
REG_BADPAT /* ] is an invalid data character in JavaScript compatibility mode */ |
REG_BADPAT, /* ] is an invalid data character in JavaScript compatibility mode */ |
145 |
|
/* 65 */ |
146 |
|
REG_BADPAT /* different names for subpatterns of the same number are not allowed */ |
147 |
}; |
}; |
148 |
|
|
149 |
/* Table of texts corresponding to POSIX error codes */ |
/* Table of texts corresponding to POSIX error codes */ |
242 |
int errorcode; |
int errorcode; |
243 |
int options = 0; |
int options = 0; |
244 |
|
|
245 |
if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS; |
if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS; |
246 |
if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE; |
if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE; |
247 |
if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL; |
if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL; |
248 |
if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE; |
if ((cflags & REG_NOSUB) != 0) options |= PCRE_NO_AUTO_CAPTURE; |
249 |
if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8; |
if ((cflags & REG_UTF8) != 0) options |= PCRE_UTF8; |
250 |
|
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE_UNGREEDY; |
251 |
|
|
252 |
preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr, |
preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr, |
253 |
&erroffset, NULL); |
&erroffset, NULL); |
254 |
preg->re_erroffset = erroffset; |
preg->re_erroffset = erroffset; |
255 |
|
|
256 |
if (preg->re_pcre == NULL) return eint[errorcode]; |
/* Safety: if the error code is too big for the translation vector (which |
257 |
|
should not happen, but we all make mistakes), return REG_BADPAT. */ |
258 |
|
|
259 |
|
if (preg->re_pcre == NULL) |
260 |
|
{ |
261 |
|
return (errorcode < sizeof(eint)/sizeof(const int))? |
262 |
|
eint[errorcode] : REG_BADPAT; |
263 |
|
} |
264 |
|
|
265 |
preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL); |
preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL); |
266 |
return 0; |
return 0; |
298 |
|
|
299 |
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL; |
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL; |
300 |
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; |
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; |
301 |
|
if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE_NOTEMPTY; |
302 |
|
|
303 |
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */ |
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */ |
304 |
|
|
305 |
/* When no string data is being returned, ensure that nmatch is zero. |
/* When no string data is being returned, or no vector has been passed in which |
306 |
Otherwise, ensure the vector for holding the return data is large enough. */ |
to put it, ensure that nmatch is zero. Otherwise, ensure the vector for holding |
307 |
|
the return data is large enough. */ |
308 |
|
|
309 |
if (nosub) nmatch = 0; |
if (nosub || pmatch == NULL) nmatch = 0; |
310 |
|
|
311 |
else if (nmatch > 0) |
else if (nmatch > 0) |
312 |
{ |
{ |