/[pcre]/code/trunk/pcredemo.c
ViewVC logotype

Diff of /code/trunk/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 442 by ph10, Fri Sep 11 10:21:02 2009 UTC
# Line 1  Line 1 
1    /*************************************************
2    *           PCRE DEMONSTRATION PROGRAM           *
3    *************************************************/
4    
5    /* This is a demonstration program to illustrate the most straightforward ways
6    of calling the PCRE regular expression library from a C program. See the
7    pcresample documentation for a short discussion ("man pcresample" if you have
8    the PCRE man pages installed).
9    
10    In Unix-like environments, compile this program thuswise:
11    
12      gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
13        -R/usr/local/lib -lpcre
14    
15    Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
16    library files for PCRE are installed on your system. You don't need -I and -L
17    if PCRE is installed in the standard system libraries. Only some operating
18    systems (e.g. Solaris) use the -R option.
19    
20    Building under Windows:
21    
22    If you want to statically link this program against a non-dll .a file, you must
23    define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
24    pcre_free() exported functions will be declared __declspec(dllimport), with
25    unwanted results. So in this environment, uncomment the following line. */
26    
27    /* #define PCRE_STATIC */
28    
29  #include <stdio.h>  #include <stdio.h>
30  #include <string.h>  #include <string.h>
31  #include <pcre.h>  #include <pcre.h>
32    
 /* Compile thuswise:  
   gcc -Wall pcredemo.c -I/opt/local/include -L/opt/local/lib \  
     -R/opt/local/lib -lpcre  
 */  
   
33  #define OVECCOUNT 30    /* should be a multiple of 3 */  #define OVECCOUNT 30    /* should be a multiple of 3 */
34    
35    
36  int main(int argc, char **argv)  int main(int argc, char **argv)
37  {  {
38  pcre *re;  pcre *re;
39  const char *error;  const char *error;
40    char *pattern;
41    char *subject;
42    unsigned char *name_table;
43  int erroffset;  int erroffset;
44    int find_all;
45    int namecount;
46    int name_entry_size;
47  int ovector[OVECCOUNT];  int ovector[OVECCOUNT];
48    int subject_length;
49  int rc, i;  int rc, i;
50    
51  if (argc != 3)  
52    /**************************************************************************
53    * First, sort out the command line. There is only one possible option at  *
54    * the moment, "-g" to request repeated matching to find all occurrences,  *
55    * like Perl's /g option. We set the variable find_all to a non-zero value *
56    * if the -g option is present. Apart from that, there must be exactly two *
57    * arguments.                                                              *
58    **************************************************************************/
59    
60    find_all = 0;
61    for (i = 1; i < argc; i++)
62      {
63      if (strcmp(argv[i], "-g") == 0) find_all = 1;
64        else break;
65      }
66    
67    /* After the options, we require exactly two arguments, which are the pattern,
68    and the subject string. */
69    
70    if (argc - i != 2)
71    {    {
72    printf("Two arguments required: a regex and a subject string\n");    printf("Two arguments required: a regex and a subject string\n");
73    return 1;    return 1;
74    }    }
75    
76  /* Compile the regular expression in the first argument */  pattern = argv[i];
77    subject = argv[i+1];
78    subject_length = (int)strlen(subject);
79    
80    
81    /*************************************************************************
82    * Now we are going to compile the regular expression pattern, and handle *
83    * and errors that are detected.                                          *
84    *************************************************************************/
85    
86  re = pcre_compile(  re = pcre_compile(
87    argv[1],              /* the pattern */    pattern,              /* the pattern */
88    0,                    /* default options */    0,                    /* default options */
89    &error,               /* for error message */    &error,               /* for error message */
90    &erroffset,           /* for error offset */    &erroffset,           /* for error offset */
# Line 40  if (re == NULL) Line 98  if (re == NULL)
98    return 1;    return 1;
99    }    }
100    
101  /* Compilation succeeded: match the subject in the second argument */  
102    /*************************************************************************
103    * If the compilation succeeded, we call PCRE again, in order to do a     *
104    * pattern match against the subject string. This does just ONE match. If *
105    * further matching is needed, it will be done below.                     *
106    *************************************************************************/
107    
108  rc = pcre_exec(  rc = pcre_exec(
109    re,                   /* the compiled pattern */    re,                   /* the compiled pattern */
110    NULL,                 /* no extra data - we didn't study the pattern */    NULL,                 /* no extra data - we didn't study the pattern */
111    argv[2],              /* the subject string */    subject,              /* the subject string */
112    (int)strlen(argv[2]), /* the length of the subject */    subject_length,       /* the length of the subject */
113    0,                    /* start at offset 0 in the subject */    0,                    /* start at offset 0 in the subject */
114    0,                    /* default options */    0,                    /* default options */
115    ovector,              /* output vector for substring information */    ovector,              /* output vector for substring information */
# Line 64  if (rc < 0) Line 127  if (rc < 0)
127      */      */
128      default: printf("Matching error %d\n", rc); break;      default: printf("Matching error %d\n", rc); break;
129      }      }
130      pcre_free(re);     /* Release memory used for the compiled pattern */
131    return 1;    return 1;
132    }    }
133    
134  /* Match succeded */  /* Match succeded */
135    
136  printf("Match succeeded\n");  printf("\nMatch succeeded at offset %d\n", ovector[0]);
137    
138    
139    /*************************************************************************
140    * We have found the first match within the subject string. If the output *
141    * vector wasn't big enough, say so. Then output any substrings that were *
142    * captured.                                                              *
143    *************************************************************************/
144    
145  /* The output vector wasn't big enough */  /* The output vector wasn't big enough */
146    
# Line 79  if (rc == 0) Line 150  if (rc == 0)
150    printf("ovector only has room for %d captured substrings\n", rc - 1);    printf("ovector only has room for %d captured substrings\n", rc - 1);
151    }    }
152    
153  /* Show substrings stored in the output vector */  /* Show substrings stored in the output vector by number. Obviously, in a real
154    application you might want to do things other than print them. */
155    
156  for (i = 0; i < rc; i++)  for (i = 0; i < rc; i++)
157    {    {
158    char *substring_start = argv[2] + ovector[2*i];    char *substring_start = subject + ovector[2*i];
159    int substring_length = ovector[2*i+1] - ovector[2*i];    int substring_length = ovector[2*i+1] - ovector[2*i];
160    printf("%2d: %.*s\n", i, substring_length, substring_start);    printf("%2d: %.*s\n", i, substring_length, substring_start);
161    }    }
162    
163    
164    /**************************************************************************
165    * That concludes the basic part of this demonstration program. We have    *
166    * compiled a pattern, and performed a single match. The code that follows *
167    * shows first how to access named substrings, and then how to code for    *
168    * repeated matches on the same subject.                                   *
169    **************************************************************************/
170    
171    /* See if there are any named substrings, and if so, show them by name. First
172    we have to extract the count of named parentheses from the pattern. */
173    
174    (void)pcre_fullinfo(
175      re,                   /* the compiled pattern */
176      NULL,                 /* no extra data - we didn't study the pattern */
177      PCRE_INFO_NAMECOUNT,  /* number of named substrings */
178      &namecount);          /* where to put the answer */
179    
180    if (namecount <= 0) printf("No named substrings\n"); else
181      {
182      unsigned char *tabptr;
183      printf("Named substrings\n");
184    
185      /* Before we can access the substrings, we must extract the table for
186      translating names to numbers, and the size of each entry in the table. */
187    
188      (void)pcre_fullinfo(
189        re,                       /* the compiled pattern */
190        NULL,                     /* no extra data - we didn't study the pattern */
191        PCRE_INFO_NAMETABLE,      /* address of the table */
192        &name_table);             /* where to put the answer */
193    
194      (void)pcre_fullinfo(
195        re,                       /* the compiled pattern */
196        NULL,                     /* no extra data - we didn't study the pattern */
197        PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
198        &name_entry_size);        /* where to put the answer */
199    
200      /* Now we can scan the table and, for each entry, print the number, the name,
201      and the substring itself. */
202    
203      tabptr = name_table;
204      for (i = 0; i < namecount; i++)
205        {
206        int n = (tabptr[0] << 8) | tabptr[1];
207        printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
208          ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
209        tabptr += name_entry_size;
210        }
211      }
212    
213    
214    /*************************************************************************
215    * If the "-g" option was given on the command line, we want to continue  *
216    * to search for additional matches in the subject string, in a similar   *
217    * way to the /g option in Perl. This turns out to be trickier than you   *
218    * might think because of the possibility of matching an empty string.    *
219    * What happens is as follows:                                            *
220    *                                                                        *
221    * If the previous match was NOT for an empty string, we can just start   *
222    * the next match at the end of the previous one.                         *
223    *                                                                        *
224    * If the previous match WAS for an empty string, we can't do that, as it *
225    * would lead to an infinite loop. Instead, a special call of pcre_exec() *
226    * is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set.    *
227    * The first of these tells PCRE that an empty string at the start of the *
228    * subject is not a valid match; other possibilities must be tried. The   *
229    * second flag restricts PCRE to one match attempt at the initial string  *
230    * position. If this match succeeds, an alternative to the empty string   *
231    * match has been found, and we can proceed round the loop.               *
232    *************************************************************************/
233    
234    if (!find_all)
235      {
236      pcre_free(re);   /* Release the memory used for the compiled pattern */
237      return 0;        /* Finish unless -g was given */
238      }
239    
240    /* Loop for second and subsequent matches */
241    
242    for (;;)
243      {
244      int options = 0;                 /* Normally no options */
245      int start_offset = ovector[1];   /* Start at end of previous match */
246    
247      /* If the previous match was for an empty string, we are finished if we are
248      at the end of the subject. Otherwise, arrange to run another match at the
249      same point to see if a non-empty match can be found. */
250    
251      if (ovector[0] == ovector[1])
252        {
253        if (ovector[0] == subject_length) break;
254        options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
255        }
256    
257      /* Run the next matching operation */
258    
259      rc = pcre_exec(
260        re,                   /* the compiled pattern */
261        NULL,                 /* no extra data - we didn't study the pattern */
262        subject,              /* the subject string */
263        subject_length,       /* the length of the subject */
264        start_offset,         /* starting offset in the subject */
265        options,              /* options */
266        ovector,              /* output vector for substring information */
267        OVECCOUNT);           /* number of elements in the output vector */
268    
269      /* This time, a result of NOMATCH isn't an error. If the value in "options"
270      is zero, it just means we have found all possible matches, so the loop ends.
271      Otherwise, it means we have failed to find a non-empty-string match at a
272      point where there was a previous empty-string match. In this case, we do what
273      Perl does: advance the matching position by one, and continue. We do this by
274      setting the "end of previous match" offset, because that is picked up at the
275      top of the loop as the point at which to start again. */
276    
277      if (rc == PCRE_ERROR_NOMATCH)
278        {
279        if (options == 0) break;
280        ovector[1] = start_offset + 1;
281        continue;    /* Go round the loop again */
282        }
283    
284      /* Other matching errors are not recoverable. */
285    
286      if (rc < 0)
287        {
288        printf("Matching error %d\n", rc);
289        pcre_free(re);    /* Release memory used for the compiled pattern */
290        return 1;
291        }
292    
293      /* Match succeded */
294    
295      printf("\nMatch succeeded again at offset %d\n", ovector[0]);
296    
297      /* The match succeeded, but the output vector wasn't big enough. */
298    
299      if (rc == 0)
300        {
301        rc = OVECCOUNT/3;
302        printf("ovector only has room for %d captured substrings\n", rc - 1);
303        }
304    
305      /* As before, show substrings stored in the output vector by number, and then
306      also any named substrings. */
307    
308      for (i = 0; i < rc; i++)
309        {
310        char *substring_start = subject + ovector[2*i];
311        int substring_length = ovector[2*i+1] - ovector[2*i];
312        printf("%2d: %.*s\n", i, substring_length, substring_start);
313        }
314    
315      if (namecount <= 0) printf("No named substrings\n"); else
316        {
317        unsigned char *tabptr = name_table;
318        printf("Named substrings\n");
319        for (i = 0; i < namecount; i++)
320          {
321          int n = (tabptr[0] << 8) | tabptr[1];
322          printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
323            ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
324          tabptr += name_entry_size;
325          }
326        }
327      }      /* End of loop to find second and subsequent matches */
328    
329    printf("\n");
330    pcre_free(re);       /* Release memory used for the compiled pattern */
331  return 0;  return 0;
332  }  }
333    
334    /* End of pcredemo.c */

Legend:
Removed from v.53  
changed lines
  Added in v.442

  ViewVC Help
Powered by ViewVC 1.1.5