/[pcre]/code/trunk/pcre_get.c
ViewVC logotype

Diff of /code/trunk/pcre_get.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 852 by zherczeg, Thu Jan 5 19:18:12 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 43  from the subject string after a regex ma Line 43  from the subject string after a regex ma
43  for these functions came from Scott Wimer. */  for these functions came from Scott Wimer. */
44    
45    
46    #ifdef HAVE_CONFIG_H
47    #include "config.h"
48    #endif
49    
50  #include "pcre_internal.h"  #include "pcre_internal.h"
51    
52    
# Line 50  for these functions came from Scott Wime Line 54  for these functions came from Scott Wime
54  *           Find number for named string         *  *           Find number for named string         *
55  *************************************************/  *************************************************/
56    
57  /* This function is used by the two extraction functions below, as well  /* This function is used by the get_first_set() function below, as well
58  as being generally available.  as being generally available. It assumes that names are unique.
59    
60  Arguments:  Arguments:
61    code        the compiled regex    code        the compiled regex
# Line 61  Returns:      the number of the named pa Line 65  Returns:      the number of the named pa
65                  (PCRE_ERROR_NOSUBSTRING) if not found                  (PCRE_ERROR_NOSUBSTRING) if not found
66  */  */
67    
68  int  #ifdef COMPILE_PCRE8
69    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
70  pcre_get_stringnumber(const pcre *code, const char *stringname)  pcre_get_stringnumber(const pcre *code, const char *stringname)
71    #else
72    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
73    pcre16_get_stringnumber(const pcre16 *code, PCRE_SPTR16 stringname)
74    #endif
75  {  {
76  int rc;  int rc;
77  int entrysize;  int entrysize;
78  int top, bot;  int top, bot;
79  uschar *nametable;  pcre_uchar *nametable;
80    
81    #ifdef COMPILE_PCRE8
82  if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)  if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
83    return rc;    return rc;
84  if (top <= 0) return PCRE_ERROR_NOSUBSTRING;  if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
# Line 77  if ((rc = pcre_fullinfo(code, NULL, PCRE Line 87  if ((rc = pcre_fullinfo(code, NULL, PCRE
87    return rc;    return rc;
88  if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)  if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
89    return rc;    return rc;
90    #endif
91    #ifdef COMPILE_PCRE16
92    if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
93      return rc;
94    if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
95    
96    if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
97      return rc;
98    if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
99      return rc;
100    #endif
101    
102  bot = 0;  bot = 0;
103  while (top > bot)  while (top > bot)
104    {    {
105    int mid = (top + bot) / 2;    int mid = (top + bot) / 2;
106    uschar *entry = nametable + entrysize*mid;    pcre_uchar *entry = nametable + entrysize*mid;
107    int c = strcmp(stringname, (char *)(entry + 2));    int c = STRCMP_UC_UC((pcre_uchar *)stringname,
108    if (c == 0) return (entry[0] << 8) + entry[1];      (pcre_uchar *)(entry + IMM2_SIZE));
109      if (c == 0) return GET2(entry, 0);
110    if (c > 0) bot = mid + 1; else top = mid;    if (c > 0) bot = mid + 1; else top = mid;
111    }    }
112    
# Line 94  return PCRE_ERROR_NOSUBSTRING; Line 116  return PCRE_ERROR_NOSUBSTRING;
116    
117    
118  /*************************************************  /*************************************************
119    *     Find (multiple) entries for named string   *
120    *************************************************/
121    
122    /* This is used by the get_first_set() function below, as well as being
123    generally available. It is used when duplicated names are permitted.
124    
125    Arguments:
126      code        the compiled regex
127      stringname  the name whose entries required
128      firstptr    where to put the pointer to the first entry
129      lastptr     where to put the pointer to the last entry
130    
131    Returns:      the length of each entry, or a negative number
132                    (PCRE_ERROR_NOSUBSTRING) if not found
133    */
134    
135    #ifdef COMPILE_PCRE8
136    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
137    pcre_get_stringtable_entries(const pcre *code, const char *stringname,
138      char **firstptr, char **lastptr)
139    #else
140    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
141    pcre16_get_stringtable_entries(const pcre16 *code, PCRE_SPTR16 stringname,
142      PCRE_SCHAR16 **firstptr, PCRE_SCHAR16 **lastptr)
143    #endif
144    {
145    int rc;
146    int entrysize;
147    int top, bot;
148    pcre_uchar *nametable, *lastentry;
149    
150    #ifdef COMPILE_PCRE8
151    if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
152      return rc;
153    if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
154    
155    if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
156      return rc;
157    if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
158      return rc;
159    #endif
160    #ifdef COMPILE_PCRE16
161    if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
162      return rc;
163    if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
164    
165    if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
166      return rc;
167    if ((rc = pcre16_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
168      return rc;
169    #endif
170    
171    lastentry = nametable + entrysize * (top - 1);
172    bot = 0;
173    while (top > bot)
174      {
175      int mid = (top + bot) / 2;
176      pcre_uchar *entry = nametable + entrysize*mid;
177      int c = STRCMP_UC_UC((pcre_uchar *)stringname,
178        (pcre_uchar *)(entry + IMM2_SIZE));
179      if (c == 0)
180        {
181        pcre_uchar *first = entry;
182        pcre_uchar *last = entry;
183        while (first > nametable)
184          {
185          if (STRCMP_UC_UC((pcre_uchar *)stringname,
186            (pcre_uchar *)(first - entrysize + IMM2_SIZE)) != 0) break;
187          first -= entrysize;
188          }
189        while (last < lastentry)
190          {
191          if (STRCMP_UC_UC((pcre_uchar *)stringname,
192            (pcre_uchar *)(last + entrysize + IMM2_SIZE)) != 0) break;
193          last += entrysize;
194          }
195    #ifdef COMPILE_PCRE8
196        *firstptr = (char *)first;
197        *lastptr = (char *)last;
198    #else
199        *firstptr = (PCRE_SCHAR16 *)first;
200        *lastptr = (PCRE_SCHAR16 *)last;
201    #endif
202        return entrysize;
203        }
204      if (c > 0) bot = mid + 1; else top = mid;
205      }
206    
207    return PCRE_ERROR_NOSUBSTRING;
208    }
209    
210    
211    
212    /*************************************************
213    *    Find first set of multiple named strings    *
214    *************************************************/
215    
216    /* This function allows for duplicate names in the table of named substrings.
217    It returns the number of the first one that was set in a pattern match.
218    
219    Arguments:
220      code         the compiled regex
221      stringname   the name of the capturing substring
222      ovector      the vector of matched substrings
223    
224    Returns:       the number of the first that is set,
225                   or the number of the last one if none are set,
226                   or a negative number on error
227    */
228    
229    #ifdef COMPILE_PCRE8
230    static int
231    get_first_set(const pcre *code, const char *stringname, int *ovector)
232    #else
233    static int
234    get_first_set(const pcre16 *code, PCRE_SPTR16 stringname, int *ovector)
235    #endif
236    {
237    const REAL_PCRE *re = (const REAL_PCRE *)code;
238    int entrysize;
239    pcre_uchar *first, *last;
240    pcre_uchar *entry;
241    #ifdef COMPILE_PCRE8
242    if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
243      return pcre_get_stringnumber(code, stringname);
244    entrysize = pcre_get_stringtable_entries(code, stringname,
245      (char **)&first, (char **)&last);
246    #else
247    if ((re->options & PCRE_DUPNAMES) == 0 && (re->flags & PCRE_JCHANGED) == 0)
248      return pcre16_get_stringnumber(code, stringname);
249    entrysize = pcre16_get_stringtable_entries(code, stringname,
250      (PCRE_SCHAR16 **)&first, (PCRE_SCHAR16 **)&last);
251    #endif
252    if (entrysize <= 0) return entrysize;
253    for (entry = (pcre_uchar *)first; entry <= (pcre_uchar *)last; entry += entrysize)
254      {
255      int n = GET2(entry, 0);
256      if (ovector[n*2] >= 0) return n;
257      }
258    return GET2(entry, 0);
259    }
260    
261    
262    
263    
264    /*************************************************
265  *      Copy captured string to given buffer      *  *      Copy captured string to given buffer      *
266  *************************************************/  *************************************************/
267    
# Line 120  Returns:         if successful: Line 288  Returns:         if successful:
288                     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring                     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
289  */  */
290    
291  int  #ifdef COMPILE_PCRE8
292    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
293  pcre_copy_substring(const char *subject, int *ovector, int stringcount,  pcre_copy_substring(const char *subject, int *ovector, int stringcount,
294    int stringnumber, char *buffer, int size)    int stringnumber, char *buffer, int size)
295    #else
296    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
297    pcre16_copy_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
298      int stringnumber, PCRE_SCHAR16 *buffer, int size)
299    #endif
300  {  {
301  int yield;  int yield;
302  if (stringnumber < 0 || stringnumber >= stringcount)  if (stringnumber < 0 || stringnumber >= stringcount)
# Line 130  if (stringnumber < 0 || stringnumber >= Line 304  if (stringnumber < 0 || stringnumber >=
304  stringnumber *= 2;  stringnumber *= 2;
305  yield = ovector[stringnumber+1] - ovector[stringnumber];  yield = ovector[stringnumber+1] - ovector[stringnumber];
306  if (size < yield + 1) return PCRE_ERROR_NOMEMORY;  if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
307  memcpy(buffer, subject + ovector[stringnumber], yield);  memcpy(buffer, subject + ovector[stringnumber], IN_UCHARS(yield));
308  buffer[yield] = 0;  buffer[yield] = 0;
309  return yield;  return yield;
310  }  }
# Line 142  return yield; Line 316  return yield;
316  *************************************************/  *************************************************/
317    
318  /* This function copies a single captured substring into a given buffer,  /* This function copies a single captured substring into a given buffer,
319  identifying it by name.  identifying it by name. If the regex permits duplicate names, the first
320    substring that is set is chosen.
321    
322  Arguments:  Arguments:
323    code           the compiled regex    code           the compiled regex
# Line 164  Returns:         if successful: Line 339  Returns:         if successful:
339                     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring                     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
340  */  */
341    
342  int  #ifdef COMPILE_PCRE8
343  pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
344    int stringcount, const char *stringname, char *buffer, int size)  pcre_copy_named_substring(const pcre *code, const char *subject,
345      int *ovector, int stringcount, const char *stringname,
346      char *buffer, int size)
347    #else
348    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
349    pcre16_copy_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
350      int *ovector, int stringcount, PCRE_SPTR16 stringname,
351      PCRE_SCHAR16 *buffer, int size)
352    #endif
353  {  {
354  int n = pcre_get_stringnumber(code, stringname);  int n = get_first_set(code, stringname, ovector);
355  if (n <= 0) return n;  if (n <= 0) return n;
356    #ifdef COMPILE_PCRE8
357  return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);  return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
358    #else
359    return pcre16_copy_substring(subject, ovector, stringcount, n, buffer, size);
360    #endif
361  }  }
362    
363    
# Line 196  Returns:         if successful: 0 Line 383  Returns:         if successful: 0
383                     PCRE_ERROR_NOMEMORY (-6) failed to get store                     PCRE_ERROR_NOMEMORY (-6) failed to get store
384  */  */
385    
386  int  #ifdef COMPILE_PCRE8
387    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
388  pcre_get_substring_list(const char *subject, int *ovector, int stringcount,  pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
389    const char ***listptr)    const char ***listptr)
390    #else
391    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
392    pcre16_get_substring_list(PCRE_SPTR16 subject, int *ovector, int stringcount,
393      PCRE_SPTR16 **listptr)
394    #endif
395  {  {
396  int i;  int i;
397  int size = sizeof(char *);  int size = sizeof(pcre_uchar *);
398  int double_count = stringcount * 2;  int double_count = stringcount * 2;
399  char **stringlist;  pcre_uchar **stringlist;
400  char *p;  pcre_uchar *p;
401    
402  for (i = 0; i < double_count; i += 2)  for (i = 0; i < double_count; i += 2)
403    size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;    size += sizeof(pcre_uchar *) + IN_UCHARS(ovector[i+1] - ovector[i] + 1);
404    
405  stringlist = (char **)(pcre_malloc)(size);  stringlist = (pcre_uchar **)(PUBL(malloc))(size);
406  if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;  if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
407    
408    #ifdef COMPILE_PCRE8
409  *listptr = (const char **)stringlist;  *listptr = (const char **)stringlist;
410  p = (char *)(stringlist + stringcount + 1);  #else
411    *listptr = (PCRE_SPTR16 *)stringlist;
412    #endif
413    p = (pcre_uchar *)(stringlist + stringcount + 1);
414    
415  for (i = 0; i < double_count; i += 2)  for (i = 0; i < double_count; i += 2)
416    {    {
417    int len = ovector[i+1] - ovector[i];    int len = ovector[i+1] - ovector[i];
418    memcpy(p, subject + ovector[i], len);    memcpy(p, subject + ovector[i], IN_UCHARS(len));
419    *stringlist++ = p;    *stringlist++ = p;
420    p += len;    p += len;
421    *p++ = 0;    *p++ = 0;
# Line 235  return 0; Line 432  return 0;
432  *************************************************/  *************************************************/
433    
434  /* This function exists for the benefit of people calling PCRE from non-C  /* This function exists for the benefit of people calling PCRE from non-C
435  programs that can call its functions, but not free() or (pcre_free)() directly.  programs that can call its functions, but not free() or (PUBL(free))()
436    directly.
437    
438  Argument:   the result of a previous pcre_get_substring_list()  Argument:   the result of a previous pcre_get_substring_list()
439  Returns:    nothing  Returns:    nothing
440  */  */
441    
442  void  #ifdef COMPILE_PCRE8
443    PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
444  pcre_free_substring_list(const char **pointer)  pcre_free_substring_list(const char **pointer)
445    #else
446    PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
447    pcre16_free_substring_list(PCRE_SPTR16 *pointer)
448    #endif
449  {  {
450  (pcre_free)((void *)pointer);  (PUBL(free))((void *)pointer);
451  }  }
452    
453    
# Line 274  Returns:         if successful: Line 477  Returns:         if successful:
477                     PCRE_ERROR_NOSUBSTRING (-7) substring not present                     PCRE_ERROR_NOSUBSTRING (-7) substring not present
478  */  */
479    
480  int  #ifdef COMPILE_PCRE8
481    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
482  pcre_get_substring(const char *subject, int *ovector, int stringcount,  pcre_get_substring(const char *subject, int *ovector, int stringcount,
483    int stringnumber, const char **stringptr)    int stringnumber, const char **stringptr)
484    #else
485    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
486    pcre16_get_substring(PCRE_SPTR16 subject, int *ovector, int stringcount,
487      int stringnumber, PCRE_SPTR16 *stringptr)
488    #endif
489  {  {
490  int yield;  int yield;
491  char *substring;  pcre_uchar *substring;
492  if (stringnumber < 0 || stringnumber >= stringcount)  if (stringnumber < 0 || stringnumber >= stringcount)
493    return PCRE_ERROR_NOSUBSTRING;    return PCRE_ERROR_NOSUBSTRING;
494  stringnumber *= 2;  stringnumber *= 2;
495  yield = ovector[stringnumber+1] - ovector[stringnumber];  yield = ovector[stringnumber+1] - ovector[stringnumber];
496  substring = (char *)(pcre_malloc)(yield + 1);  substring = (pcre_uchar *)(PUBL(malloc))(IN_UCHARS(yield + 1));
497  if (substring == NULL) return PCRE_ERROR_NOMEMORY;  if (substring == NULL) return PCRE_ERROR_NOMEMORY;
498  memcpy(substring, subject + ovector[stringnumber], yield);  memcpy(substring, subject + ovector[stringnumber], IN_UCHARS(yield));
499  substring[yield] = 0;  substring[yield] = 0;
500  *stringptr = substring;  #ifdef COMPILE_PCRE8
501    *stringptr = (const char *)substring;
502    #else
503    *stringptr = (PCRE_SPTR16)substring;
504    #endif
505  return yield;  return yield;
506  }  }
507    
# Line 299  return yield; Line 512  return yield;
512  *************************************************/  *************************************************/
513    
514  /* This function copies a single captured substring, identified by name, into  /* This function copies a single captured substring, identified by name, into
515  new store.  new store. If the regex permits duplicate names, the first substring that is
516    set is chosen.
517    
518  Arguments:  Arguments:
519    code           the compiled regex    code           the compiled regex
# Line 320  Returns:         if successful: Line 534  Returns:         if successful:
534                     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring                     PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
535  */  */
536    
537  int  #ifdef COMPILE_PCRE8
538  pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
539    int stringcount, const char *stringname, const char **stringptr)  pcre_get_named_substring(const pcre *code, const char *subject,
540      int *ovector, int stringcount, const char *stringname,
541      const char **stringptr)
542    #else
543    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
544    pcre16_get_named_substring(const pcre16 *code, PCRE_SPTR16 subject,
545      int *ovector, int stringcount, PCRE_SPTR16 stringname,
546      PCRE_SPTR16 *stringptr)
547    #endif
548  {  {
549  int n = pcre_get_stringnumber(code, stringname);  int n = get_first_set(code, stringname, ovector);
550  if (n <= 0) return n;  if (n <= 0) return n;
551    #ifdef COMPILE_PCRE8
552  return pcre_get_substring(subject, ovector, stringcount, n, stringptr);  return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
553    #else
554    return pcre16_get_substring(subject, ovector, stringcount, n, stringptr);
555    #endif
556  }  }
557    
558    
# Line 337  return pcre_get_substring(subject, ovect Line 563  return pcre_get_substring(subject, ovect
563  *************************************************/  *************************************************/
564    
565  /* This function exists for the benefit of people calling PCRE from non-C  /* This function exists for the benefit of people calling PCRE from non-C
566  programs that can call its functions, but not free() or (pcre_free)() directly.  programs that can call its functions, but not free() or (PUBL(free))()
567    directly.
568    
569  Argument:   the result of a previous pcre_get_substring()  Argument:   the result of a previous pcre_get_substring()
570  Returns:    nothing  Returns:    nothing
571  */  */
572    
573  void  #ifdef COMPILE_PCRE8
574    PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
575  pcre_free_substring(const char *pointer)  pcre_free_substring(const char *pointer)
576    #else
577    PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
578    pcre16_free_substring(PCRE_SPTR16 pointer)
579    #endif
580  {  {
581  (pcre_free)((void *)pointer);  (PUBL(free))((void *)pointer);
582  }  }
583    
584  /* End of pcre_get.c */  /* End of pcre_get.c */

Legend:
Removed from v.87  
changed lines
  Added in v.852

  ViewVC Help
Powered by ViewVC 1.1.5