/[pcre]/code/trunk/pcre_get.c
ViewVC logotype

Contents of /code/trunk/pcre_get.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 852 - (show annotations)
Thu Jan 5 19:18:12 2012 UTC (3 years, 4 months ago) by zherczeg
File MIME type: text/plain
File size: 19088 byte(s)
Error occurred while calculating annotation data.
Add pcre16 prefix to 16 bit structs
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
44
45
46 #ifdef HAVE_CONFIG_H
47 #include "config.h"
48 #endif
49
50 #include "pcre_internal.h"
51
52
53 /*************************************************
54 * Find number for named string *
55 *************************************************/
56
57 /* This function is used by the get_first_set() function below, as well
58 as being generally available. It assumes that names are unique.
59
60 Arguments:
61 code the compiled regex
62 stringname the name whose number is required
63
64 Returns: the number of the named parentheses, or a negative number
65 (PCRE_ERROR_NOSUBSTRING) if not found
66 */
67
68 #ifdef COMPILE_PCRE8
69 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
70 pcre_get_stringnumber(const pcre *code, const char *stringname)
71 #else
72 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
73 pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
74 #endif
75 {
76 int rc;
77 int entrysize;
78 int top, bot;
79 pcre_uchar *nametable;
80
81 #ifdef COMPILE_PCRE8
82 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
83 return rc;
84 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
85
86 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
87 return rc;
88 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
89 return rc;
90 #endif
91 #ifdef COMPILE_PCRE16
92 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
93 return rc;
94 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
95
96 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
97 return rc;
98 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
99 return rc;
100 #endif
101
102 bot = 0;
103 while (top > bot)
104 {
105 int mid = (top + bot) / 2;
106 pcre_uchar *entry = nametable + entrysize*mid;
107 int c = STRCMP_UC_UC((pcre_uchar *)stringname,
108 (pcre_uchar *)(entry + IMM2_SIZE));
109 if (c == 0) return GET2(entry, 0);
110 if (c > 0) bot = mid + 1; else top = mid;
111 }
112
113 return PCRE_ERROR_NOSUBSTRING;
114 }
115
116
117
118 /*************************************************
119 * Find (multiple) entries for named string *
120 *************************************************/
121
122 /* This is used by the get_first_set() function below, as well as being
123 generally available. It is used when duplicated names are permitted.
124
125 Arguments:
126 code the compiled regex
127 stringname the name whose entries required
128 firstptr where to put the pointer to the first entry
129 lastptr where to put the pointer to the last entry
130
131 Returns: the length of each entry, or a negative number
132 (PCRE_ERROR_NOSUBSTRING) if not found
133 */
134
135 #ifdef COMPILE_PCRE8
136 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
137 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
138 char **firstptr, char **lastptr)
139 #else
140 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
141 pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
142 PCRE_SCHAR16 **firstptr, PCRE_SCHAR16 **lastptr)
143 #endif
144 {
145 int rc;
146 int entrysize;
147 int top, bot;
148 pcre_uchar *nametable, *lastentry;
149
150 #ifdef COMPILE_PCRE8
151 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
152 return rc;
153 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
154
155 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
156 return rc;
157 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
158 return rc;
159 #endif
160 #ifdef COMPILE_PCRE16
161 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
162 return rc;
163 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
164
165 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
166 return rc;
167 if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
168 return rc;
169 #endif
170
171 lastentry = nametable + entrysize * (top - 1);
172 bot = 0;
173 while (top > bot)
174 {
175 int mid = (top + bot) / 2;
176 pcre_uchar *entry = nametable + entrysize*mid;
177 int c = STRCMP_UC_UC((pcre_uchar *)stringname,
178 (pcre_uchar *)(entry + IMM2_SIZE));
179 if (c == 0)
180 {
181 pcre_uchar *first = entry;
182 pcre_uchar *last = entry;
183 while (first > nametable)
184 {
185 if (STRCMP_UC_UC((pcre_uchar *)stringname,
186 (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
187 first -= entrysize;
188 }
189 while (last < lastentry)
190 {
191 if (STRCMP_UC_UC((pcre_uchar *)stringname,
192 (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
193 last += entrysize;
194 }
195 #ifdef COMPILE_PCRE8
196 *firstptr = (char *)first;
197 *lastptr = (char *)last;
198 #else
199 *firstptr = (PCRE_SCHAR16 *)first;
200 *lastptr = (PCRE_SCHAR16 *)last;
201 #endif
202 return entrysize;
203 }
204 if (c > 0) bot = mid + 1; else top = mid;
205 }
206
207 return PCRE_ERROR_NOSUBSTRING;
208 }
209
210
211
212 /*************************************************
213 * Find first set of multiple named strings *
214 *************************************************/
215
216 /* This function allows for duplicate names in the table of named substrings.
217 It returns the number of the first one that was set in a pattern match.
218
219 Arguments:
220 code the compiled regex
221 stringname the name of the capturing substring
222 ovector the vector of matched substrings
223
224 Returns: the number of the first that is set,
225 or the number of the last one if none are set,
226 or a negative number on error
227 */
228
229 #ifdef COMPILE_PCRE8
230 static int
231 get_first_set(const pcre *code, const char *stringname, int *ovector)
232 #else
233 static int
234 get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
235 #endif
236 {
237 const REAL_PCRE *re = (const REAL_PCRE *)code;
238 int entrysize;
239 pcre_uchar *first, *last;
240 pcre_uchar *entry;
241 #ifdef COMPILE_PCRE8
242 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
243 return pcre_get_stringnumber(code, stringname);
244 entrysize = pcre_get_stringtable_entries(code, stringname,
245 (char **)&first, (char **)&last);
246 #else
247 if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
248 return pcre16_get_stringnumber(code, stringname);
249 entrysize = pcre16_get_stringtable_entries(code, stringname,
250 (PCRE_SCHAR16 **)&first, (PCRE_SCHAR16 **)&last);
251 #endif
252 if (entrysize <= 0) return entrysize;
253 for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
254 {
255 int n = GET2(entry, 0);
256 if (ovector[n*2] >= 0) return n;
257 }
258 return GET2(entry, 0);
259 }
260
261
262
263
264 /*************************************************
265 * Copy captured string to given buffer *
266 *************************************************/
267
268 /* This function copies a single captured substring into a given buffer.
269 Note that we use memcpy() rather than strncpy() in case there are binary zeros
270 in the string.
271
272 Arguments:
273 subject the subject string that was matched
274 ovector pointer to the offsets table
275 stringcount the number of substrings that were captured
276 (i.e. the yield of the pcre_exec call, unless
277 that was zero, in which case it should be 1/3
278 of the offset table size)
279 stringnumber the number of the required substring
280 buffer where to put the substring
281 size the size of the buffer
282
283 Returns: if successful:
284 the length of the copied string, not including the zero
285 that is put on the end; can be zero
286 if not successful:
287 PCRE_ERROR_NOMEMORY (-6) buffer too small
288 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
289 */
290
291 #ifdef COMPILE_PCRE8
292 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
293 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
294 int stringnumber, char *buffer, int size)
295 #else
296 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
297 pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
298 int stringnumber, PCRE_SCHAR16 *buffer, int size)
299 #endif
300 {
301 int yield;
302 if (stringnumber < 0 || stringnumber >= stringcount)
303 return PCRE_ERROR_NOSUBSTRING;
304 stringnumber *= 2;
305 yield = ovector[stringnumber+1] - ovector[stringnumber];
306 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
307 memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
308 buffer[yield] = 0;
309 return yield;
310 }
311
312
313
314 /*************************************************
315 * Copy named captured string to given buffer *
316 *************************************************/
317
318 /* This function copies a single captured substring into a given buffer,
319 identifying it by name. If the regex permits duplicate names, the first
320 substring that is set is chosen.
321
322 Arguments:
323 code the compiled regex
324 subject the subject string that was matched
325 ovector pointer to the offsets table
326 stringcount the number of substrings that were captured
327 (i.e. the yield of the pcre_exec call, unless
328 that was zero, in which case it should be 1/3
329 of the offset table size)
330 stringname the name of the required substring
331 buffer where to put the substring
332 size the size of the buffer
333
334 Returns: if successful:
335 the length of the copied string, not including the zero
336 that is put on the end; can be zero
337 if not successful:
338 PCRE_ERROR_NOMEMORY (-6) buffer too small
339 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
340 */
341
342 #ifdef COMPILE_PCRE8
343 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
344 pcre_copy_named_substring(const pcre *code, const char *subject,
345 int *ovector, int stringcount, const char *stringname,
346 char *buffer, int size)
347 #else
348 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
349 pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
350 int *ovector, int stringcount, PCRE_SPTR16 stringname,
351 PCRE_SCHAR16 *buffer, int size)
352 #endif
353 {
354 int n = get_first_set(code, stringname, ovector);
355 if (n <= 0) return n;
356 #ifdef COMPILE_PCRE8
357 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
358 #else
359 return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
360 #endif
361 }
362
363
364
365 /*************************************************
366 * Copy all captured strings to new store *
367 *************************************************/
368
369 /* This function gets one chunk of store and builds a list of pointers and all
370 of the captured substrings in it. A NULL pointer is put on the end of the list.
371
372 Arguments:
373 subject the subject string that was matched
374 ovector pointer to the offsets table
375 stringcount the number of substrings that were captured
376 (i.e. the yield of the pcre_exec call, unless
377 that was zero, in which case it should be 1/3
378 of the offset table size)
379 listptr set to point to the list of pointers
380
381 Returns: if successful: 0
382 if not successful:
383 PCRE_ERROR_NOMEMORY (-6) failed to get store
384 */
385
386 #ifdef COMPILE_PCRE8
387 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
388 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
389 const char ***listptr)
390 #else
391 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
392 pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
393 PCRE_SPTR16 **listptr)
394 #endif
395 {
396 int i;
397 int size = sizeof(pcre_uchar *);
398 int double_count = stringcount * 2;
399 pcre_uchar **stringlist;
400 pcre_uchar *p;
401
402 for (i = 0; i < double_count; i += 2)
403 size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
404
405 stringlist = (pcre_uchar **)(PUBL(malloc))(size);
406 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
407
408 #ifdef COMPILE_PCRE8
409 *listptr = (const char **)stringlist;
410 #else
411 *listptr = (PCRE_SPTR16 *)stringlist;
412 #endif
413 p = (pcre_uchar *)(stringlist + stringcount + 1);
414
415 for (i = 0; i < double_count; i += 2)
416 {
417 int len = ovector[i+1] - ovector[i];
418 memcpy(p, subject + ovector[i], IN_UCHARS(len));
419 *stringlist++ = p;
420 p += len;
421 *p++ = 0;
422 }
423
424 *stringlist = NULL;
425 return 0;
426 }
427
428
429
430 /*************************************************
431 * Free store obtained by get_substring_list *
432 *************************************************/
433
434 /* This function exists for the benefit of people calling PCRE from non-C
435 programs that can call its functions, but not free() or (PUBL(free))()
436 directly.
437
438 Argument: the result of a previous pcre_get_substring_list()
439 Returns: nothing
440 */
441
442 #ifdef COMPILE_PCRE8
443 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
444 pcre_free_substring_list(const char **pointer)
445 #else
446 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
447 pcre16_free_substring_list(PCRE_SPTR16 *pointer)
448 #endif
449 {
450 (PUBL(free))((void *)pointer);
451 }
452
453
454
455 /*************************************************
456 * Copy captured string to new store *
457 *************************************************/
458
459 /* This function copies a single captured substring into a piece of new
460 store
461
462 Arguments:
463 subject the subject string that was matched
464 ovector pointer to the offsets table
465 stringcount the number of substrings that were captured
466 (i.e. the yield of the pcre_exec call, unless
467 that was zero, in which case it should be 1/3
468 of the offset table size)
469 stringnumber the number of the required substring
470 stringptr where to put a pointer to the substring
471
472 Returns: if successful:
473 the length of the string, not including the zero that
474 is put on the end; can be zero
475 if not successful:
476 PCRE_ERROR_NOMEMORY (-6) failed to get store
477 PCRE_ERROR_NOSUBSTRING (-7) substring not present
478 */
479
480 #ifdef COMPILE_PCRE8
481 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
482 pcre_get_substring(const char *subject, int *ovector, int stringcount,
483 int stringnumber, const char **stringptr)
484 #else
485 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
486 pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
487 int stringnumber, PCRE_SPTR16 *stringptr)
488 #endif
489 {
490 int yield;
491 pcre_uchar *substring;
492 if (stringnumber < 0 || stringnumber >= stringcount)
493 return PCRE_ERROR_NOSUBSTRING;
494 stringnumber *= 2;
495 yield = ovector[stringnumber+1] - ovector[stringnumber];
496 substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
497 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
498 memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
499 substring[yield] = 0;
500 #ifdef COMPILE_PCRE8
501 *stringptr = (const char *)substring;
502 #else
503 *stringptr = (PCRE_SPTR16)substring;
504 #endif
505 return yield;
506 }
507
508
509
510 /*************************************************
511 * Copy named captured string to new store *
512 *************************************************/
513
514 /* This function copies a single captured substring, identified by name, into
515 new store. If the regex permits duplicate names, the first substring that is
516 set is chosen.
517
518 Arguments:
519 code the compiled regex
520 subject the subject string that was matched
521 ovector pointer to the offsets table
522 stringcount the number of substrings that were captured
523 (i.e. the yield of the pcre_exec call, unless
524 that was zero, in which case it should be 1/3
525 of the offset table size)
526 stringname the name of the required substring
527 stringptr where to put the pointer
528
529 Returns: if successful:
530 the length of the copied string, not including the zero
531 that is put on the end; can be zero
532 if not successful:
533 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
534 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
535 */
536
537 #ifdef COMPILE_PCRE8
538 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
539 pcre_get_named_substring(const pcre *code, const char *subject,
540 int *ovector, int stringcount, const char *stringname,
541 const char **stringptr)
542 #else
543 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
544 pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
545 int *ovector, int stringcount, PCRE_SPTR16 stringname,
546 PCRE_SPTR16 *stringptr)
547 #endif
548 {
549 int n = get_first_set(code, stringname, ovector);
550 if (n <= 0) return n;
551 #ifdef COMPILE_PCRE8
552 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
553 #else
554 return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
555 #endif
556 }
557
558
559
560
561 /*************************************************
562 * Free store obtained by get_substring *
563 *************************************************/
564
565 /* This function exists for the benefit of people calling PCRE from non-C
566 programs that can call its functions, but not free() or (PUBL(free))()
567 directly.
568
569 Argument: the result of a previous pcre_get_substring()
570 Returns: nothing
571 */
572
573 #ifdef COMPILE_PCRE8
574 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
575 pcre_free_substring(const char *pointer)
576 #else
577 PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
578 pcre16_free_substring(PCRE_SPTR16 pointer)
579 #endif
580 {
581 (PUBL(free))((void *)pointer);
582 }
583
584 /* End of pcre_get.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5