/[pcre]/code/branches/pcre16/pcre_get.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_get.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 804 - (show annotations)
Wed Dec 14 11:18:01 2011 UTC (9 years, 4 months ago) by zherczeg
File MIME type: text/plain
File size: 18358 byte(s)
PUBL macro added, single char optimization is fixed, MAX_255 checks are added, pcre_jit_test now copy the default tables to help valgrind
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2008 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
44
45
46 #ifdef HAVE_CONFIG_H
47 #include "config.h"
48 #endif
49
50 #include "pcre_internal.h"
51
52
53 /*************************************************
54 * Find number for named string *
55 *************************************************/
56
57 /* This function is used by the get_first_set() function below, as well
58 as being generally available. It assumes that names are unique.
59
60 Arguments:
61 code the compiled regex
62 stringname the name whose number is required
63
64 Returns: the number of the named parentheses, or a negative number
65 (PCRE_ERROR_NOSUBSTRING) if not found
66 */
67
68 #ifdef COMPILE_PCRE8
69 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
70 pcre_get_stringnumber(const pcre *code, const char *stringname)
71 #else
72 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
73 pcre16_get_stringnumber(const pcre *code, PCRE_SPTR16 stringname)
74 #endif
75 {
76 int rc;
77 int entrysize;
78 int top, bot;
79 pcre_uchar *nametable;
80
81 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
82 return rc;
83 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
84
85 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
86 return rc;
87 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
88 return rc;
89
90 bot = 0;
91 while (top > bot)
92 {
93 int mid = (top + bot) / 2;
94 pcre_uchar *entry = nametable + entrysize*mid;
95 int c = STRCMP_UC_UC((pcre_uchar *)stringname,
96 (pcre_uchar *)(entry + IMM2_SIZE));
97 if (c == 0) return (entry[0] << 8) + entry[1];
98 if (c > 0) bot = mid + 1; else top = mid;
99 }
100
101 return PCRE_ERROR_NOSUBSTRING;
102 }
103
104
105
106 /*************************************************
107 * Find (multiple) entries for named string *
108 *************************************************/
109
110 /* This is used by the get_first_set() function below, as well as being
111 generally available. It is used when duplicated names are permitted.
112
113 Arguments:
114 code the compiled regex
115 stringname the name whose entries required
116 firstptr where to put the pointer to the first entry
117 lastptr where to put the pointer to the last entry
118
119 Returns: the length of each entry, or a negative number
120 (PCRE_ERROR_NOSUBSTRING) if not found
121 */
122
123 #ifdef COMPILE_PCRE8
124 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
125 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
126 char **firstptr, char **lastptr)
127 #else
128 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
129 pcre16_get_stringtable_entries(const pcre *code, PCRE_SPTR16 stringname,
130 PCRE_SCHAR16 **firstptr, PCRE_SCHAR16 **lastptr)
131 #endif
132 {
133 int rc;
134 int entrysize;
135 int top, bot;
136 pcre_uchar *nametable, *lastentry;
137
138 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
139 return rc;
140 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
141
142 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
143 return rc;
144 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
145 return rc;
146
147 lastentry = nametable + entrysize * (top - 1);
148 bot = 0;
149 while (top > bot)
150 {
151 int mid = (top + bot) / 2;
152 pcre_uchar *entry = nametable + entrysize*mid;
153 int c = STRCMP_UC_UC((pcre_uchar *)stringname,
154 (pcre_uchar *)(entry + IMM2_SIZE));
155 if (c == 0)
156 {
157 pcre_uchar *first = entry;
158 pcre_uchar *last = entry;
159 while (first > nametable)
160 {
161 if (STRCMP_UC_UC((pcre_uchar *)stringname,
162 (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
163 first -= entrysize;
164 }
165 while (last < lastentry)
166 {
167 if (STRCMP_UC_UC((pcre_uchar *)stringname,
168 (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
169 last += entrysize;
170 }
171 #ifdef COMPILE_PCRE8
172 *firstptr = (char *)first;
173 *lastptr = (char *)last;
174 #else
175 *firstptr = (PCRE_SCHAR16 *)first;
176 *lastptr = (PCRE_SCHAR16 *)last;
177 #endif
178 return entrysize;
179 }
180 if (c > 0) bot = mid + 1; else top = mid;
181 }
182
183 return PCRE_ERROR_NOSUBSTRING;
184 }
185
186
187
188 /*************************************************
189 * Find first set of multiple named strings *
190 *************************************************/
191
192 /* This function allows for duplicate names in the table of named substrings.
193 It returns the number of the first one that was set in a pattern match.
194
195 Arguments:
196 code the compiled regex
197 stringname the name of the capturing substring
198 ovector the vector of matched substrings
199
200 Returns: the number of the first that is set,
201 or the number of the last one if none are set,
202 or a negative number on error
203 */
204
205 #ifdef COMPILE_PCRE8
206 static int
207 get_first_set(const pcre *code, const char *stringname, int *ovector)
208 #else
209 static int
210 get_first_set(const pcre *code, PCRE_SPTR16 stringname, int *ovector)
211 #endif
212 {
213 const real_pcre *re = (const real_pcre *)code;
214 int entrysize;
215 pcre_uchar *first, *last;
216 pcre_uchar *entry;
217 #ifdef COMPILE_PCRE8
218 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
219 return pcre_get_stringnumber(code, stringname);
220 entrysize = pcre_get_stringtable_entries(code, stringname,
221 (char **)&first, (char **)&last);
222 #else
223 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
224 return pcre16_get_stringnumber(code, stringname);
225 entrysize = pcre16_get_stringtable_entries(code, stringname,
226 (PCRE_SCHAR16 **)&first, (PCRE_SCHAR16 **)&last);
227 #endif
228 if (entrysize <= 0) return entrysize;
229 for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
230 {
231 int n = (entry[0] << 8) + entry[1];
232 if (ovector[n*2] >= 0) return n;
233 }
234 return (first[0] << 8) + first[1];
235 }
236
237
238
239
240 /*************************************************
241 * Copy captured string to given buffer *
242 *************************************************/
243
244 /* This function copies a single captured substring into a given buffer.
245 Note that we use memcpy() rather than strncpy() in case there are binary zeros
246 in the string.
247
248 Arguments:
249 subject the subject string that was matched
250 ovector pointer to the offsets table
251 stringcount the number of substrings that were captured
252 (i.e. the yield of the pcre_exec call, unless
253 that was zero, in which case it should be 1/3
254 of the offset table size)
255 stringnumber the number of the required substring
256 buffer where to put the substring
257 size the size of the buffer
258
259 Returns: if successful:
260 the length of the copied string, not including the zero
261 that is put on the end; can be zero
262 if not successful:
263 PCRE_ERROR_NOMEMORY (-6) buffer too small
264 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
265 */
266
267 #ifdef COMPILE_PCRE8
268 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
269 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
270 int stringnumber, char *buffer, int size)
271 #else
272 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
273 pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
274 int stringnumber, PCRE_SCHAR16 *buffer, int size)
275 #endif
276 {
277 int yield;
278 if (stringnumber < 0 || stringnumber >= stringcount)
279 return PCRE_ERROR_NOSUBSTRING;
280 stringnumber *= 2;
281 yield = ovector[stringnumber+1] - ovector[stringnumber];
282 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
283 memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
284 buffer[yield] = 0;
285 return yield;
286 }
287
288
289
290 /*************************************************
291 * Copy named captured string to given buffer *
292 *************************************************/
293
294 /* This function copies a single captured substring into a given buffer,
295 identifying it by name. If the regex permits duplicate names, the first
296 substring that is set is chosen.
297
298 Arguments:
299 code the compiled regex
300 subject the subject string that was matched
301 ovector pointer to the offsets table
302 stringcount the number of substrings that were captured
303 (i.e. the yield of the pcre_exec call, unless
304 that was zero, in which case it should be 1/3
305 of the offset table size)
306 stringname the name of the required substring
307 buffer where to put the substring
308 size the size of the buffer
309
310 Returns: if successful:
311 the length of the copied string, not including the zero
312 that is put on the end; can be zero
313 if not successful:
314 PCRE_ERROR_NOMEMORY (-6) buffer too small
315 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
316 */
317
318 #ifdef COMPILE_PCRE8
319 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
320 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
321 int stringcount, const char *stringname, char *buffer, int size)
322 #else
323 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
324 pcre16_copy_named_substring(const pcre *code, PCRE_SPTR16 subject, int *ovector,
325 int stringcount, PCRE_SPTR16 stringname, PCRE_SCHAR16 *buffer, int size)
326 #endif
327 {
328 int n = get_first_set(code, stringname, ovector);
329 if (n <= 0) return n;
330 #ifdef COMPILE_PCRE8
331 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
332 #else
333 return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
334 #endif
335 }
336
337
338
339 /*************************************************
340 * Copy all captured strings to new store *
341 *************************************************/
342
343 /* This function gets one chunk of store and builds a list of pointers and all
344 of the captured substrings in it. A NULL pointer is put on the end of the list.
345
346 Arguments:
347 subject the subject string that was matched
348 ovector pointer to the offsets table
349 stringcount the number of substrings that were captured
350 (i.e. the yield of the pcre_exec call, unless
351 that was zero, in which case it should be 1/3
352 of the offset table size)
353 listptr set to point to the list of pointers
354
355 Returns: if successful: 0
356 if not successful:
357 PCRE_ERROR_NOMEMORY (-6) failed to get store
358 */
359
360 #ifdef COMPILE_PCRE8
361 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
362 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
363 const char ***listptr)
364 #else
365 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
366 pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
367 PCRE_SPTR16 **listptr)
368 #endif
369 {
370 int i;
371 int size = sizeof(pcre_uchar *);
372 int double_count = stringcount * 2;
373 pcre_uchar **stringlist;
374 pcre_uchar *p;
375
376 for (i = 0; i < double_count; i += 2)
377 size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
378
379 stringlist = (pcre_uchar **)(PUBL(malloc))(size);
380 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
381
382 #ifdef COMPILE_PCRE8
383 *listptr = (const char **)stringlist;
384 #else
385 *listptr = (PCRE_SPTR16 *)stringlist;
386 #endif
387 p = (pcre_uchar *)(stringlist + stringcount + 1);
388
389 for (i = 0; i < double_count; i += 2)
390 {
391 int len = ovector[i+1] - ovector[i];
392 memcpy(p, subject + ovector[i], IN_UCHARS(len));
393 *stringlist++ = p;
394 p += len;
395 *p++ = 0;
396 }
397
398 *stringlist = NULL;
399 return 0;
400 }
401
402
403
404 /*************************************************
405 * Free store obtained by get_substring_list *
406 *************************************************/
407
408 /* This function exists for the benefit of people calling PCRE from non-C
409 programs that can call its functions, but not free() or (PUBL(free))()
410 directly.
411
412 Argument: the result of a previous pcre_get_substring_list()
413 Returns: nothing
414 */
415
416 #ifdef COMPILE_PCRE8
417 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
418 pcre_free_substring_list(const char **pointer)
419 #else
420 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
421 pcre16_free_substring_list(PCRE_SPTR16 *pointer)
422 #endif
423 {
424 (PUBL(free))((void *)pointer);
425 }
426
427
428
429 /*************************************************
430 * Copy captured string to new store *
431 *************************************************/
432
433 /* This function copies a single captured substring into a piece of new
434 store
435
436 Arguments:
437 subject the subject string that was matched
438 ovector pointer to the offsets table
439 stringcount the number of substrings that were captured
440 (i.e. the yield of the pcre_exec call, unless
441 that was zero, in which case it should be 1/3
442 of the offset table size)
443 stringnumber the number of the required substring
444 stringptr where to put a pointer to the substring
445
446 Returns: if successful:
447 the length of the string, not including the zero that
448 is put on the end; can be zero
449 if not successful:
450 PCRE_ERROR_NOMEMORY (-6) failed to get store
451 PCRE_ERROR_NOSUBSTRING (-7) substring not present
452 */
453
454 #ifdef COMPILE_PCRE8
455 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
456 pcre_get_substring(const char *subject, int *ovector, int stringcount,
457 int stringnumber, const char **stringptr)
458 #else
459 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
460 pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
461 int stringnumber, PCRE_SPTR16 *stringptr)
462 #endif
463 {
464 int yield;
465 pcre_uchar *substring;
466 if (stringnumber < 0 || stringnumber >= stringcount)
467 return PCRE_ERROR_NOSUBSTRING;
468 stringnumber *= 2;
469 yield = ovector[stringnumber+1] - ovector[stringnumber];
470 substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
471 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
472 memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
473 substring[yield] = 0;
474 #ifdef COMPILE_PCRE8
475 *stringptr = (const char *)substring;
476 #else
477 *stringptr = (PCRE_SPTR16)substring;
478 #endif
479 return yield;
480 }
481
482
483
484 /*************************************************
485 * Copy named captured string to new store *
486 *************************************************/
487
488 /* This function copies a single captured substring, identified by name, into
489 new store. If the regex permits duplicate names, the first substring that is
490 set is chosen.
491
492 Arguments:
493 code the compiled regex
494 subject the subject string that was matched
495 ovector pointer to the offsets table
496 stringcount the number of substrings that were captured
497 (i.e. the yield of the pcre_exec call, unless
498 that was zero, in which case it should be 1/3
499 of the offset table size)
500 stringname the name of the required substring
501 stringptr where to put the pointer
502
503 Returns: if successful:
504 the length of the copied string, not including the zero
505 that is put on the end; can be zero
506 if not successful:
507 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
508 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
509 */
510
511 #ifdef COMPILE_PCRE8
512 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
513 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
514 int stringcount, const char *stringname, const char **stringptr)
515 #else
516 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
517 pcre16_get_named_substring(const pcre *code, PCRE_SPTR16 subject, int *ovector,
518 int stringcount, PCRE_SPTR16 stringname, PCRE_SPTR16 *stringptr)
519 #endif
520 {
521 int n = get_first_set(code, stringname, ovector);
522 if (n <= 0) return n;
523 #ifdef COMPILE_PCRE8
524 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
525 #else
526 return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
527 #endif
528 }
529
530
531
532
533 /*************************************************
534 * Free store obtained by get_substring *
535 *************************************************/
536
537 /* This function exists for the benefit of people calling PCRE from non-C
538 programs that can call its functions, but not free() or (PUBL(free))()
539 directly.
540
541 Argument: the result of a previous pcre_get_substring()
542 Returns: nothing
543 */
544
545 #ifdef COMPILE_PCRE8
546 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
547 pcre_free_substring(const char *pointer)
548 #else
549 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
550 pcre16_free_substring(PCRE_SPTR16 pointer)
551 #endif
552 {
553 (PUBL(free))((void *)pointer);
554 }
555
556 /* End of pcre_get.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5