1 |
/*************************************************
|
2 |
* Perl-Compatible Regular Expressions *
|
3 |
*************************************************/
|
4 |
|
5 |
/* PCRE is a library of functions to support regular expressions whose syntax
|
6 |
and semantics are as close as possible to those of the Perl 5 language.
|
7 |
|
8 |
Written by Philip Hazel
|
9 |
Copyright (c) 1997-2008 University of Cambridge
|
10 |
|
11 |
-----------------------------------------------------------------------------
|
12 |
Redistribution and use in source and binary forms, with or without
|
13 |
modification, are permitted provided that the following conditions are met:
|
14 |
|
15 |
* Redistributions of source code must retain the above copyright notice,
|
16 |
this list of conditions and the following disclaimer.
|
17 |
|
18 |
* Redistributions in binary form must reproduce the above copyright
|
19 |
notice, this list of conditions and the following disclaimer in the
|
20 |
documentation and/or other materials provided with the distribution.
|
21 |
|
22 |
* Neither the name of the University of Cambridge nor the names of its
|
23 |
contributors may be used to endorse or promote products derived from
|
24 |
this software without specific prior written permission.
|
25 |
|
26 |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
27 |
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
28 |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
29 |
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
30 |
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
31 |
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
32 |
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
33 |
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
34 |
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
35 |
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
36 |
POSSIBILITY OF SUCH DAMAGE.
|
37 |
-----------------------------------------------------------------------------
|
38 |
*/
|
39 |
|
40 |
|
41 |
/* This module contains some convenience functions for extracting substrings
|
42 |
from the subject string after a regex match has succeeded. The original idea
|
43 |
for these functions came from Scott Wimer. */
|
44 |
|
45 |
|
46 |
#ifdef HAVE_CONFIG_H
|
47 |
#include "config.h"
|
48 |
#endif
|
49 |
|
50 |
#include "pcre_internal.h"
|
51 |
|
52 |
|
53 |
/*************************************************
|
54 |
* Find number for named string *
|
55 |
*************************************************/
|
56 |
|
57 |
/* This function is used by the get_first_set() function below, as well
|
58 |
as being generally available. It assumes that names are unique.
|
59 |
|
60 |
Arguments:
|
61 |
code the compiled regex
|
62 |
stringname the name whose number is required
|
63 |
|
64 |
Returns: the number of the named parentheses, or a negative number
|
65 |
(PCRE_ERROR_NOSUBSTRING) if not found
|
66 |
*/
|
67 |
|
68 |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
69 |
pcre_get_stringnumber(const pcre *code, const char *stringname)
|
70 |
{
|
71 |
int rc;
|
72 |
int entrysize;
|
73 |
int top, bot;
|
74 |
uschar *nametable;
|
75 |
|
76 |
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
77 |
return rc;
|
78 |
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
79 |
|
80 |
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
81 |
return rc;
|
82 |
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
83 |
return rc;
|
84 |
|
85 |
bot = 0;
|
86 |
while (top > bot)
|
87 |
{
|
88 |
int mid = (top + bot) / 2;
|
89 |
uschar *entry = nametable + entrysize*mid;
|
90 |
int c = strcmp(stringname, (char *)(entry + 2));
|
91 |
if (c == 0) return (entry[0] << 8) + entry[1];
|
92 |
if (c > 0) bot = mid + 1; else top = mid;
|
93 |
}
|
94 |
|
95 |
return PCRE_ERROR_NOSUBSTRING;
|
96 |
}
|
97 |
|
98 |
|
99 |
|
100 |
/*************************************************
|
101 |
* Find (multiple) entries for named string *
|
102 |
*************************************************/
|
103 |
|
104 |
/* This is used by the get_first_set() function below, as well as being
|
105 |
generally available. It is used when duplicated names are permitted.
|
106 |
|
107 |
Arguments:
|
108 |
code the compiled regex
|
109 |
stringname the name whose entries required
|
110 |
firstptr where to put the pointer to the first entry
|
111 |
lastptr where to put the pointer to the last entry
|
112 |
|
113 |
Returns: the length of each entry, or a negative number
|
114 |
(PCRE_ERROR_NOSUBSTRING) if not found
|
115 |
*/
|
116 |
|
117 |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
118 |
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
|
119 |
char **firstptr, char **lastptr)
|
120 |
{
|
121 |
int rc;
|
122 |
int entrysize;
|
123 |
int top, bot;
|
124 |
uschar *nametable, *lastentry;
|
125 |
|
126 |
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
|
127 |
return rc;
|
128 |
if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
|
129 |
|
130 |
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
|
131 |
return rc;
|
132 |
if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
|
133 |
return rc;
|
134 |
|
135 |
lastentry = nametable + entrysize * (top - 1);
|
136 |
bot = 0;
|
137 |
while (top > bot)
|
138 |
{
|
139 |
int mid = (top + bot) / 2;
|
140 |
uschar *entry = nametable + entrysize*mid;
|
141 |
int c = strcmp(stringname, (char *)(entry + 2));
|
142 |
if (c == 0)
|
143 |
{
|
144 |
uschar *first = entry;
|
145 |
uschar *last = entry;
|
146 |
while (first > nametable)
|
147 |
{
|
148 |
if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
|
149 |
first -= entrysize;
|
150 |
}
|
151 |
while (last < lastentry)
|
152 |
{
|
153 |
if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
|
154 |
last += entrysize;
|
155 |
}
|
156 |
*firstptr = (char *)first;
|
157 |
*lastptr = (char *)last;
|
158 |
return entrysize;
|
159 |
}
|
160 |
if (c > 0) bot = mid + 1; else top = mid;
|
161 |
}
|
162 |
|
163 |
return PCRE_ERROR_NOSUBSTRING;
|
164 |
}
|
165 |
|
166 |
|
167 |
|
168 |
/*************************************************
|
169 |
* Find first set of multiple named strings *
|
170 |
*************************************************/
|
171 |
|
172 |
/* This function allows for duplicate names in the table of named substrings.
|
173 |
It returns the number of the first one that was set in a pattern match.
|
174 |
|
175 |
Arguments:
|
176 |
code the compiled regex
|
177 |
stringname the name of the capturing substring
|
178 |
ovector the vector of matched substrings
|
179 |
|
180 |
Returns: the number of the first that is set,
|
181 |
or the number of the last one if none are set,
|
182 |
or a negative number on error
|
183 |
*/
|
184 |
|
185 |
static int
|
186 |
get_first_set(const pcre *code, const char *stringname, int *ovector)
|
187 |
{
|
188 |
const real_pcre *re = (const real_pcre *)code;
|
189 |
int entrysize;
|
190 |
char *first, *last;
|
191 |
uschar *entry;
|
192 |
if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
|
193 |
return pcre_get_stringnumber(code, stringname);
|
194 |
entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
|
195 |
if (entrysize <= 0) return entrysize;
|
196 |
for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
|
197 |
{
|
198 |
int n = (entry[0] << 8) + entry[1];
|
199 |
if (ovector[n*2] >= 0) return n;
|
200 |
}
|
201 |
return (first[0] << 8) + first[1];
|
202 |
}
|
203 |
|
204 |
|
205 |
|
206 |
|
207 |
/*************************************************
|
208 |
* Copy captured string to given buffer *
|
209 |
*************************************************/
|
210 |
|
211 |
/* This function copies a single captured substring into a given buffer.
|
212 |
Note that we use memcpy() rather than strncpy() in case there are binary zeros
|
213 |
in the string.
|
214 |
|
215 |
Arguments:
|
216 |
subject the subject string that was matched
|
217 |
ovector pointer to the offsets table
|
218 |
stringcount the number of substrings that were captured
|
219 |
(i.e. the yield of the pcre_exec call, unless
|
220 |
that was zero, in which case it should be 1/3
|
221 |
of the offset table size)
|
222 |
stringnumber the number of the required substring
|
223 |
buffer where to put the substring
|
224 |
size the size of the buffer
|
225 |
|
226 |
Returns: if successful:
|
227 |
the length of the copied string, not including the zero
|
228 |
that is put on the end; can be zero
|
229 |
if not successful:
|
230 |
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
231 |
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
232 |
*/
|
233 |
|
234 |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
235 |
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
|
236 |
int stringnumber, char *buffer, int size)
|
237 |
{
|
238 |
int yield;
|
239 |
if (stringnumber < 0 || stringnumber >= stringcount)
|
240 |
return PCRE_ERROR_NOSUBSTRING;
|
241 |
stringnumber *= 2;
|
242 |
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
243 |
if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
|
244 |
memcpy(buffer, subject + ovector[stringnumber], yield);
|
245 |
buffer[yield] = 0;
|
246 |
return yield;
|
247 |
}
|
248 |
|
249 |
|
250 |
|
251 |
/*************************************************
|
252 |
* Copy named captured string to given buffer *
|
253 |
*************************************************/
|
254 |
|
255 |
/* This function copies a single captured substring into a given buffer,
|
256 |
identifying it by name. If the regex permits duplicate names, the first
|
257 |
substring that is set is chosen.
|
258 |
|
259 |
Arguments:
|
260 |
code the compiled regex
|
261 |
subject the subject string that was matched
|
262 |
ovector pointer to the offsets table
|
263 |
stringcount the number of substrings that were captured
|
264 |
(i.e. the yield of the pcre_exec call, unless
|
265 |
that was zero, in which case it should be 1/3
|
266 |
of the offset table size)
|
267 |
stringname the name of the required substring
|
268 |
buffer where to put the substring
|
269 |
size the size of the buffer
|
270 |
|
271 |
Returns: if successful:
|
272 |
the length of the copied string, not including the zero
|
273 |
that is put on the end; can be zero
|
274 |
if not successful:
|
275 |
PCRE_ERROR_NOMEMORY (-6) buffer too small
|
276 |
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
277 |
*/
|
278 |
|
279 |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
280 |
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
|
281 |
int stringcount, const char *stringname, char *buffer, int size)
|
282 |
{
|
283 |
int n = get_first_set(code, stringname, ovector);
|
284 |
if (n <= 0) return n;
|
285 |
return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
|
286 |
}
|
287 |
|
288 |
|
289 |
|
290 |
/*************************************************
|
291 |
* Copy all captured strings to new store *
|
292 |
*************************************************/
|
293 |
|
294 |
/* This function gets one chunk of store and builds a list of pointers and all
|
295 |
of the captured substrings in it. A NULL pointer is put on the end of the list.
|
296 |
|
297 |
Arguments:
|
298 |
subject the subject string that was matched
|
299 |
ovector pointer to the offsets table
|
300 |
stringcount the number of substrings that were captured
|
301 |
(i.e. the yield of the pcre_exec call, unless
|
302 |
that was zero, in which case it should be 1/3
|
303 |
of the offset table size)
|
304 |
listptr set to point to the list of pointers
|
305 |
|
306 |
Returns: if successful: 0
|
307 |
if not successful:
|
308 |
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
309 |
*/
|
310 |
|
311 |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
312 |
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
|
313 |
const char ***listptr)
|
314 |
{
|
315 |
int i;
|
316 |
int size = sizeof(char *);
|
317 |
int double_count = stringcount * 2;
|
318 |
char **stringlist;
|
319 |
char *p;
|
320 |
|
321 |
for (i = 0; i < double_count; i += 2)
|
322 |
size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
|
323 |
|
324 |
stringlist = (char **)(pcre_malloc)(size);
|
325 |
if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
|
326 |
|
327 |
*listptr = (const char **)stringlist;
|
328 |
p = (char *)(stringlist + stringcount + 1);
|
329 |
|
330 |
for (i = 0; i < double_count; i += 2)
|
331 |
{
|
332 |
int len = ovector[i+1] - ovector[i];
|
333 |
memcpy(p, subject + ovector[i], len);
|
334 |
*stringlist++ = p;
|
335 |
p += len;
|
336 |
*p++ = 0;
|
337 |
}
|
338 |
|
339 |
*stringlist = NULL;
|
340 |
return 0;
|
341 |
}
|
342 |
|
343 |
|
344 |
|
345 |
/*************************************************
|
346 |
* Free store obtained by get_substring_list *
|
347 |
*************************************************/
|
348 |
|
349 |
/* This function exists for the benefit of people calling PCRE from non-C
|
350 |
programs that can call its functions, but not free() or (pcre_free)() directly.
|
351 |
|
352 |
Argument: the result of a previous pcre_get_substring_list()
|
353 |
Returns: nothing
|
354 |
*/
|
355 |
|
356 |
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
357 |
pcre_free_substring_list(const char **pointer)
|
358 |
{
|
359 |
(pcre_free)((void *)pointer);
|
360 |
}
|
361 |
|
362 |
|
363 |
|
364 |
/*************************************************
|
365 |
* Copy captured string to new store *
|
366 |
*************************************************/
|
367 |
|
368 |
/* This function copies a single captured substring into a piece of new
|
369 |
store
|
370 |
|
371 |
Arguments:
|
372 |
subject the subject string that was matched
|
373 |
ovector pointer to the offsets table
|
374 |
stringcount the number of substrings that were captured
|
375 |
(i.e. the yield of the pcre_exec call, unless
|
376 |
that was zero, in which case it should be 1/3
|
377 |
of the offset table size)
|
378 |
stringnumber the number of the required substring
|
379 |
stringptr where to put a pointer to the substring
|
380 |
|
381 |
Returns: if successful:
|
382 |
the length of the string, not including the zero that
|
383 |
is put on the end; can be zero
|
384 |
if not successful:
|
385 |
PCRE_ERROR_NOMEMORY (-6) failed to get store
|
386 |
PCRE_ERROR_NOSUBSTRING (-7) substring not present
|
387 |
*/
|
388 |
|
389 |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
390 |
pcre_get_substring(const char *subject, int *ovector, int stringcount,
|
391 |
int stringnumber, const char **stringptr)
|
392 |
{
|
393 |
int yield;
|
394 |
char *substring;
|
395 |
if (stringnumber < 0 || stringnumber >= stringcount)
|
396 |
return PCRE_ERROR_NOSUBSTRING;
|
397 |
stringnumber *= 2;
|
398 |
yield = ovector[stringnumber+1] - ovector[stringnumber];
|
399 |
substring = (char *)(pcre_malloc)(yield + 1);
|
400 |
if (substring == NULL) return PCRE_ERROR_NOMEMORY;
|
401 |
memcpy(substring, subject + ovector[stringnumber], yield);
|
402 |
substring[yield] = 0;
|
403 |
*stringptr = substring;
|
404 |
return yield;
|
405 |
}
|
406 |
|
407 |
|
408 |
|
409 |
/*************************************************
|
410 |
* Copy named captured string to new store *
|
411 |
*************************************************/
|
412 |
|
413 |
/* This function copies a single captured substring, identified by name, into
|
414 |
new store. If the regex permits duplicate names, the first substring that is
|
415 |
set is chosen.
|
416 |
|
417 |
Arguments:
|
418 |
code the compiled regex
|
419 |
subject the subject string that was matched
|
420 |
ovector pointer to the offsets table
|
421 |
stringcount the number of substrings that were captured
|
422 |
(i.e. the yield of the pcre_exec call, unless
|
423 |
that was zero, in which case it should be 1/3
|
424 |
of the offset table size)
|
425 |
stringname the name of the required substring
|
426 |
stringptr where to put the pointer
|
427 |
|
428 |
Returns: if successful:
|
429 |
the length of the copied string, not including the zero
|
430 |
that is put on the end; can be zero
|
431 |
if not successful:
|
432 |
PCRE_ERROR_NOMEMORY (-6) couldn't get memory
|
433 |
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
|
434 |
*/
|
435 |
|
436 |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
|
437 |
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
|
438 |
int stringcount, const char *stringname, const char **stringptr)
|
439 |
{
|
440 |
int n = get_first_set(code, stringname, ovector);
|
441 |
if (n <= 0) return n;
|
442 |
return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
|
443 |
}
|
444 |
|
445 |
|
446 |
|
447 |
|
448 |
/*************************************************
|
449 |
* Free store obtained by get_substring *
|
450 |
*************************************************/
|
451 |
|
452 |
/* This function exists for the benefit of people calling PCRE from non-C
|
453 |
programs that can call its functions, but not free() or (pcre_free)() directly.
|
454 |
|
455 |
Argument: the result of a previous pcre_get_substring()
|
456 |
Returns: nothing
|
457 |
*/
|
458 |
|
459 |
PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
|
460 |
pcre_free_substring(const char *pointer)
|
461 |
{
|
462 |
(pcre_free)((void *)pointer);
|
463 |
}
|
464 |
|
465 |
/* End of pcre_get.c */
|