6 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
7 |
|
|
8 |
Written by Philip Hazel |
Written by Philip Hazel |
9 |
Copyright (c) 1997-2007 University of Cambridge |
Copyright (c) 1997-2009 University of Cambridge |
10 |
|
|
11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
42 |
functions. */ |
functions. */ |
43 |
|
|
44 |
|
|
45 |
#ifdef HAVE_CONFIG_H |
#ifdef HAVE_CONFIG_H |
46 |
#include <config.h> |
#include "config.h" |
47 |
#endif |
#endif |
48 |
|
|
49 |
|
|
50 |
/* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for |
/* Ensure that the PCREPOSIX_EXP_xxx macros are set appropriately for |
51 |
compiling these functions. This must come before including pcreposix.h, where |
compiling these functions. This must come before including pcreposix.h, where |
52 |
they are set for an application (using these functions) if they have not |
they are set for an application (using these functions) if they have not |
53 |
previously been set. */ |
previously been set. */ |
57 |
# define PCREPOSIX_EXP_DEFN __declspec(dllexport) |
# define PCREPOSIX_EXP_DEFN __declspec(dllexport) |
58 |
#endif |
#endif |
59 |
|
|
60 |
|
#include "pcre.h" |
61 |
#include "pcre_internal.h" |
#include "pcre_internal.h" |
62 |
#include "pcreposix.h" |
#include "pcreposix.h" |
63 |
|
|
64 |
|
|
|
|
|
65 |
/* Table to translate PCRE compile time error codes into POSIX error codes. */ |
/* Table to translate PCRE compile time error codes into POSIX error codes. */ |
66 |
|
|
67 |
static const int eint[] = { |
static const int eint[] = { |
70 |
REG_EESCAPE, /* \c at end of pattern */ |
REG_EESCAPE, /* \c at end of pattern */ |
71 |
REG_EESCAPE, /* unrecognized character follows \ */ |
REG_EESCAPE, /* unrecognized character follows \ */ |
72 |
REG_BADBR, /* numbers out of order in {} quantifier */ |
REG_BADBR, /* numbers out of order in {} quantifier */ |
73 |
|
/* 5 */ |
74 |
REG_BADBR, /* number too big in {} quantifier */ |
REG_BADBR, /* number too big in {} quantifier */ |
75 |
REG_EBRACK, /* missing terminating ] for character class */ |
REG_EBRACK, /* missing terminating ] for character class */ |
76 |
REG_ECTYPE, /* invalid escape sequence in character class */ |
REG_ECTYPE, /* invalid escape sequence in character class */ |
77 |
REG_ERANGE, /* range out of order in character class */ |
REG_ERANGE, /* range out of order in character class */ |
78 |
REG_BADRPT, /* nothing to repeat */ |
REG_BADRPT, /* nothing to repeat */ |
79 |
|
/* 10 */ |
80 |
REG_BADRPT, /* operand of unlimited repeat could match the empty string */ |
REG_BADRPT, /* operand of unlimited repeat could match the empty string */ |
81 |
REG_ASSERT, /* internal error: unexpected repeat */ |
REG_ASSERT, /* internal error: unexpected repeat */ |
82 |
REG_BADPAT, /* unrecognized character after (? */ |
REG_BADPAT, /* unrecognized character after (? */ |
83 |
REG_BADPAT, /* POSIX named classes are supported only within a class */ |
REG_BADPAT, /* POSIX named classes are supported only within a class */ |
84 |
REG_EPAREN, /* missing ) */ |
REG_EPAREN, /* missing ) */ |
85 |
|
/* 15 */ |
86 |
REG_ESUBREG, /* reference to non-existent subpattern */ |
REG_ESUBREG, /* reference to non-existent subpattern */ |
87 |
REG_INVARG, /* erroffset passed as NULL */ |
REG_INVARG, /* erroffset passed as NULL */ |
88 |
REG_INVARG, /* unknown option bit(s) set */ |
REG_INVARG, /* unknown option bit(s) set */ |
89 |
REG_EPAREN, /* missing ) after comment */ |
REG_EPAREN, /* missing ) after comment */ |
90 |
REG_ESIZE, /* parentheses nested too deeply */ |
REG_ESIZE, /* parentheses nested too deeply */ |
91 |
|
/* 20 */ |
92 |
REG_ESIZE, /* regular expression too large */ |
REG_ESIZE, /* regular expression too large */ |
93 |
REG_ESPACE, /* failed to get memory */ |
REG_ESPACE, /* failed to get memory */ |
94 |
REG_EPAREN, /* unmatched brackets */ |
REG_EPAREN, /* unmatched parentheses */ |
95 |
REG_ASSERT, /* internal error: code overflow */ |
REG_ASSERT, /* internal error: code overflow */ |
96 |
REG_BADPAT, /* unrecognized character after (?< */ |
REG_BADPAT, /* unrecognized character after (?< */ |
97 |
|
/* 25 */ |
98 |
REG_BADPAT, /* lookbehind assertion is not fixed length */ |
REG_BADPAT, /* lookbehind assertion is not fixed length */ |
99 |
REG_BADPAT, /* malformed number or name after (?( */ |
REG_BADPAT, /* malformed number or name after (?( */ |
100 |
REG_BADPAT, /* conditional group contains more than two branches */ |
REG_BADPAT, /* conditional group contains more than two branches */ |
101 |
REG_BADPAT, /* assertion expected after (?( */ |
REG_BADPAT, /* assertion expected after (?( */ |
102 |
REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */ |
REG_BADPAT, /* (?R or (?[+-]digits must be followed by ) */ |
103 |
|
/* 30 */ |
104 |
REG_ECTYPE, /* unknown POSIX class name */ |
REG_ECTYPE, /* unknown POSIX class name */ |
105 |
REG_BADPAT, /* POSIX collating elements are not supported */ |
REG_BADPAT, /* POSIX collating elements are not supported */ |
106 |
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */ |
REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */ |
107 |
REG_BADPAT, /* spare error */ |
REG_BADPAT, /* spare error */ |
108 |
REG_BADPAT, /* character value in \x{...} sequence is too large */ |
REG_BADPAT, /* character value in \x{...} sequence is too large */ |
109 |
|
/* 35 */ |
110 |
REG_BADPAT, /* invalid condition (?(0) */ |
REG_BADPAT, /* invalid condition (?(0) */ |
111 |
REG_BADPAT, /* \C not allowed in lookbehind assertion */ |
REG_BADPAT, /* \C not allowed in lookbehind assertion */ |
112 |
REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */ |
REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */ |
113 |
REG_BADPAT, /* number after (?C is > 255 */ |
REG_BADPAT, /* number after (?C is > 255 */ |
114 |
REG_BADPAT, /* closing ) for (?C expected */ |
REG_BADPAT, /* closing ) for (?C expected */ |
115 |
|
/* 40 */ |
116 |
REG_BADPAT, /* recursive call could loop indefinitely */ |
REG_BADPAT, /* recursive call could loop indefinitely */ |
117 |
REG_BADPAT, /* unrecognized character after (?P */ |
REG_BADPAT, /* unrecognized character after (?P */ |
118 |
REG_BADPAT, /* syntax error in subpattern name (missing terminator) */ |
REG_BADPAT, /* syntax error in subpattern name (missing terminator) */ |
119 |
REG_BADPAT, /* two named subpatterns have the same name */ |
REG_BADPAT, /* two named subpatterns have the same name */ |
120 |
REG_BADPAT, /* invalid UTF-8 string */ |
REG_BADPAT, /* invalid UTF-8 string */ |
121 |
|
/* 45 */ |
122 |
REG_BADPAT, /* support for \P, \p, and \X has not been compiled */ |
REG_BADPAT, /* support for \P, \p, and \X has not been compiled */ |
123 |
REG_BADPAT, /* malformed \P or \p sequence */ |
REG_BADPAT, /* malformed \P or \p sequence */ |
124 |
REG_BADPAT, /* unknown property name after \P or \p */ |
REG_BADPAT, /* unknown property name after \P or \p */ |
125 |
REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */ |
REG_BADPAT, /* subpattern name is too long (maximum 32 characters) */ |
126 |
REG_BADPAT, /* too many named subpatterns (maximum 10,000) */ |
REG_BADPAT, /* too many named subpatterns (maximum 10,000) */ |
127 |
|
/* 50 */ |
128 |
REG_BADPAT, /* repeated subpattern is too long */ |
REG_BADPAT, /* repeated subpattern is too long */ |
129 |
REG_BADPAT, /* octal value is greater than \377 (not in UTF-8 mode) */ |
REG_BADPAT, /* octal value is greater than \377 (not in UTF-8 mode) */ |
130 |
REG_BADPAT, /* internal error: overran compiling workspace */ |
REG_BADPAT, /* internal error: overran compiling workspace */ |
131 |
REG_BADPAT, /* internal error: previously-checked referenced subpattern not found */ |
REG_BADPAT, /* internal error: previously-checked referenced subpattern not found */ |
132 |
REG_BADPAT, /* DEFINE group contains more than one branch */ |
REG_BADPAT, /* DEFINE group contains more than one branch */ |
133 |
|
/* 55 */ |
134 |
REG_BADPAT, /* repeating a DEFINE group is not allowed */ |
REG_BADPAT, /* repeating a DEFINE group is not allowed */ |
135 |
REG_INVARG, /* inconsistent NEWLINE options */ |
REG_INVARG, /* inconsistent NEWLINE options */ |
136 |
REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */ |
REG_BADPAT, /* \g is not followed followed by an (optionally braced) non-zero number */ |
137 |
REG_BADPAT /* (?+ or (?- must be followed by a non-zero number */ |
REG_BADPAT, /* a numbered reference must not be zero */ |
138 |
|
REG_BADPAT, /* (*VERB) with an argument is not supported */ |
139 |
|
/* 60 */ |
140 |
|
REG_BADPAT, /* (*VERB) not recognized */ |
141 |
|
REG_BADPAT, /* number is too big */ |
142 |
|
REG_BADPAT, /* subpattern name expected */ |
143 |
|
REG_BADPAT, /* digit expected after (?+ */ |
144 |
|
REG_BADPAT /* ] is an invalid data character in JavaScript compatibility mode */ |
145 |
}; |
}; |
146 |
|
|
147 |
/* Table of texts corresponding to POSIX error codes */ |
/* Table of texts corresponding to POSIX error codes */ |
174 |
* Translate error code to string * |
* Translate error code to string * |
175 |
*************************************************/ |
*************************************************/ |
176 |
|
|
177 |
PCREPOSIX_EXP_DEFN size_t |
PCREPOSIX_EXP_DEFN size_t PCRE_CALL_CONVENTION |
178 |
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) |
regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) |
179 |
{ |
{ |
180 |
const char *message, *addmessage; |
const char *message, *addmessage; |
209 |
* Free store held by a regex * |
* Free store held by a regex * |
210 |
*************************************************/ |
*************************************************/ |
211 |
|
|
212 |
PCREPOSIX_EXP_DEFN void |
PCREPOSIX_EXP_DEFN void PCRE_CALL_CONVENTION |
213 |
regfree(regex_t *preg) |
regfree(regex_t *preg) |
214 |
{ |
{ |
215 |
(pcre_free)(preg->re_pcre); |
(pcre_free)(preg->re_pcre); |
232 |
various non-zero codes on failure |
various non-zero codes on failure |
233 |
*/ |
*/ |
234 |
|
|
235 |
PCREPOSIX_EXP_DEFN int |
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION |
236 |
regcomp(regex_t *preg, const char *pattern, int cflags) |
regcomp(regex_t *preg, const char *pattern, int cflags) |
237 |
{ |
{ |
238 |
const char *errorptr; |
const char *errorptr; |
250 |
&erroffset, NULL); |
&erroffset, NULL); |
251 |
preg->re_erroffset = erroffset; |
preg->re_erroffset = erroffset; |
252 |
|
|
253 |
if (preg->re_pcre == NULL) return eint[errorcode]; |
/* Safety: if the error code is too big for the translation vector (which |
254 |
|
should not happen, but we all make mistakes), return REG_BADPAT. */ |
255 |
|
|
256 |
|
if (preg->re_pcre == NULL) |
257 |
|
{ |
258 |
|
return (errorcode < sizeof(eint)/sizeof(const int))? |
259 |
|
eint[errorcode] : REG_BADPAT; |
260 |
|
} |
261 |
|
|
262 |
preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL); |
preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL); |
263 |
return 0; |
return 0; |
281 |
be set. When this is the case, the nmatch and pmatch arguments are ignored, and |
be set. When this is the case, the nmatch and pmatch arguments are ignored, and |
282 |
the only result is yes/no/error. */ |
the only result is yes/no/error. */ |
283 |
|
|
284 |
PCREPOSIX_EXP_DEFN int |
PCREPOSIX_EXP_DEFN int PCRE_CALL_CONVENTION |
285 |
regexec(const regex_t *preg, const char *string, size_t nmatch, |
regexec(const regex_t *preg, const char *string, size_t nmatch, |
286 |
regmatch_t pmatch[], int eflags) |
regmatch_t pmatch[], int eflags) |
287 |
{ |
{ |
288 |
int rc; |
int rc, so, eo; |
289 |
int options = 0; |
int options = 0; |
290 |
int *ovector = NULL; |
int *ovector = NULL; |
291 |
int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; |
int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; |
295 |
|
|
296 |
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL; |
if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL; |
297 |
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; |
if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; |
298 |
|
if ((eflags & REG_NOTEMPTY) != 0) options |= PCRE_NOTEMPTY; |
299 |
|
|
300 |
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */ |
((regex_t *)preg)->re_erroffset = (size_t)(-1); /* Only has meaning after compile */ |
301 |
|
|
319 |
} |
} |
320 |
} |
} |
321 |
|
|
322 |
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string), |
/* REG_STARTEND is a BSD extension, to allow for non-NUL-terminated strings. |
323 |
|
The man page from OS X says "REG_STARTEND affects only the location of the |
324 |
|
string, not how it is matched". That is why the "so" value is used to bump the |
325 |
|
start location rather than being passed as a PCRE "starting offset". */ |
326 |
|
|
327 |
|
if ((eflags & REG_STARTEND) != 0) |
328 |
|
{ |
329 |
|
so = pmatch[0].rm_so; |
330 |
|
eo = pmatch[0].rm_eo; |
331 |
|
} |
332 |
|
else |
333 |
|
{ |
334 |
|
so = 0; |
335 |
|
eo = strlen(string); |
336 |
|
} |
337 |
|
|
338 |
|
rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string + so, (eo - so), |
339 |
0, options, ovector, nmatch * 3); |
0, options, ovector, nmatch * 3); |
340 |
|
|
341 |
if (rc == 0) rc = nmatch; /* All captured slots were filled in */ |
if (rc == 0) rc = nmatch; /* All captured slots were filled in */ |