/[pcre]/code/trunk/pcredemo.c
ViewVC logotype

Diff of /code/trunk/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC
# Line 1  Line 1 
1    /*************************************************
2    *           PCRE DEMONSTRATION PROGRAM           *
3    *************************************************/
4    
5    /* This is a demonstration program to illustrate the most straightforward ways
6    of calling the PCRE regular expression library from a C program. See the
7    pcresample documentation for a short discussion.
8    
9    Compile thuswise:
10      gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
11        -R/usr/local/lib -lpcre
12    
13    Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
14    library files for PCRE are installed on your system. You don't need -I and -L
15    if PCRE is installed in the standard system libraries. Only some operating
16    systems (e.g. Solaris) use the -R option.
17    */
18    
19    
20  #include <stdio.h>  #include <stdio.h>
21  #include <string.h>  #include <string.h>
22  #include <pcre.h>  #include <pcre.h>
23    
 /* Compile thuswise:  
   gcc -Wall pcredemo.c -I/opt/local/include -L/opt/local/lib \  
     -R/opt/local/lib -lpcre  
 */  
   
24  #define OVECCOUNT 30    /* should be a multiple of 3 */  #define OVECCOUNT 30    /* should be a multiple of 3 */
25    
26    
27  int main(int argc, char **argv)  int main(int argc, char **argv)
28  {  {
29  pcre *re;  pcre *re;
30  const char *error;  const char *error;
31    char *pattern;
32    char *subject;
33    unsigned char *name_table;
34  int erroffset;  int erroffset;
35    int find_all;
36    int namecount;
37    int name_entry_size;
38  int ovector[OVECCOUNT];  int ovector[OVECCOUNT];
39    int subject_length;
40  int rc, i;  int rc, i;
41    
42  if (argc != 3)  
43    /**************************************************************************
44    * First, sort out the command line. There is only one possible option at  *
45    * the moment, "-g" to request repeated matching to find all occurrences,  *
46    * like Perl's /g option. We set the variable find_all to a non-zero value *
47    * if the -g option is present. Apart from that, there must be exactly two *
48    * arguments.                                                              *
49    **************************************************************************/
50    
51    find_all = 0;
52    for (i = 1; i < argc; i++)
53      {
54      if (strcmp(argv[i], "-g") == 0) find_all = 1;
55        else break;
56      }
57    
58    /* After the options, we require exactly two arguments, which are the pattern,
59    and the subject string. */
60    
61    if (argc - i != 2)
62    {    {
63    printf("Two arguments required: a regex and a subject string\n");    printf("Two arguments required: a regex and a subject string\n");
64    return 1;    return 1;
65    }    }
66    
67  /* Compile the regular expression in the first argument */  pattern = argv[i];
68    subject = argv[i+1];
69    subject_length = (int)strlen(subject);
70    
71    
72    /*************************************************************************
73    * Now we are going to compile the regular expression pattern, and handle *
74    * and errors that are detected.                                          *
75    *************************************************************************/
76    
77  re = pcre_compile(  re = pcre_compile(
78    argv[1],              /* the pattern */    pattern,              /* the pattern */
79    0,                    /* default options */    0,                    /* default options */
80    &error,               /* for error message */    &error,               /* for error message */
81    &erroffset,           /* for error offset */    &erroffset,           /* for error offset */
# Line 40  if (re == NULL) Line 89  if (re == NULL)
89    return 1;    return 1;
90    }    }
91    
92  /* Compilation succeeded: match the subject in the second argument */  
93    /*************************************************************************
94    * If the compilation succeeded, we call PCRE again, in order to do a     *
95    * pattern match against the subject string. This does just ONE match. If *
96    * further matching is needed, it will be done below.                     *
97    *************************************************************************/
98    
99  rc = pcre_exec(  rc = pcre_exec(
100    re,                   /* the compiled pattern */    re,                   /* the compiled pattern */
101    NULL,                 /* no extra data - we didn't study the pattern */    NULL,                 /* no extra data - we didn't study the pattern */
102    argv[2],              /* the subject string */    subject,              /* the subject string */
103    (int)strlen(argv[2]), /* the length of the subject */    subject_length,       /* the length of the subject */
104    0,                    /* start at offset 0 in the subject */    0,                    /* start at offset 0 in the subject */
105    0,                    /* default options */    0,                    /* default options */
106    ovector,              /* output vector for substring information */    ovector,              /* output vector for substring information */
# Line 64  if (rc < 0) Line 118  if (rc < 0)
118      */      */
119      default: printf("Matching error %d\n", rc); break;      default: printf("Matching error %d\n", rc); break;
120      }      }
121      pcre_free(re);     /* Release memory used for the compiled pattern */
122    return 1;    return 1;
123    }    }
124    
125  /* Match succeded */  /* Match succeded */
126    
127  printf("Match succeeded\n");  printf("\nMatch succeeded at offset %d\n", ovector[0]);
128    
129    
130    /*************************************************************************
131    * We have found the first match within the subject string. If the output *
132    * vector wasn't big enough, set its size to the maximum. Then output any *
133    * substrings that were captured.                                         *
134    *************************************************************************/
135    
136  /* The output vector wasn't big enough */  /* The output vector wasn't big enough */
137    
# Line 79  if (rc == 0) Line 141  if (rc == 0)
141    printf("ovector only has room for %d captured substrings\n", rc - 1);    printf("ovector only has room for %d captured substrings\n", rc - 1);
142    }    }
143    
144  /* Show substrings stored in the output vector */  /* Show substrings stored in the output vector by number. Obviously, in a real
145    application you might want to do things other than print them. */
146    
147  for (i = 0; i < rc; i++)  for (i = 0; i < rc; i++)
148    {    {
149    char *substring_start = argv[2] + ovector[2*i];    char *substring_start = subject + ovector[2*i];
150    int substring_length = ovector[2*i+1] - ovector[2*i];    int substring_length = ovector[2*i+1] - ovector[2*i];
151    printf("%2d: %.*s\n", i, substring_length, substring_start);    printf("%2d: %.*s\n", i, substring_length, substring_start);
152    }    }
153    
154    
155    /**************************************************************************
156    * That concludes the basic part of this demonstration program. We have    *
157    * compiled a pattern, and performed a single match. The code that follows *
158    * first shows how to access named substrings, and then how to code for    *
159    * repeated matches on the same subject.                                   *
160    **************************************************************************/
161    
162    /* See if there are any named substrings, and if so, show them by name. First
163    we have to extract the count of named parentheses from the pattern. */
164    
165    (void)pcre_fullinfo(
166      re,                   /* the compiled pattern */
167      NULL,                 /* no extra data - we didn't study the pattern */
168      PCRE_INFO_NAMECOUNT,  /* number of named substrings */
169      &namecount);          /* where to put the answer */
170    
171    if (namecount <= 0) printf("No named substrings\n"); else
172      {
173      unsigned char *tabptr;
174      printf("Named substrings\n");
175    
176      /* Before we can access the substrings, we must extract the table for
177      translating names to numbers, and the size of each entry in the table. */
178    
179      (void)pcre_fullinfo(
180        re,                       /* the compiled pattern */
181        NULL,                     /* no extra data - we didn't study the pattern */
182        PCRE_INFO_NAMETABLE,      /* address of the table */
183        &name_table);             /* where to put the answer */
184    
185      (void)pcre_fullinfo(
186        re,                       /* the compiled pattern */
187        NULL,                     /* no extra data - we didn't study the pattern */
188        PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
189        &name_entry_size);        /* where to put the answer */
190    
191      /* Now we can scan the table and, for each entry, print the number, the name,
192      and the substring itself. */
193    
194      tabptr = name_table;
195      for (i = 0; i < namecount; i++)
196        {
197        int n = (tabptr[0] << 8) | tabptr[1];
198        printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
199          ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
200        tabptr += name_entry_size;
201        }
202      }
203    
204    
205    /*************************************************************************
206    * If the "-g" option was given on the command line, we want to continue  *
207    * to search for additional matches in the subject string, in a similar   *
208    * way to the /g option in Perl. This turns out to be trickier than you   *
209    * might think because of the possibility of matching an empty string.    *
210    * What happens is as follows:                                            *
211    *                                                                        *
212    * If the previous match was NOT for an empty string, we can just start   *
213    * the next match at the end of the previous one.                         *
214    *                                                                        *
215    * If the previous match WAS for an empty string, we can't do that, as it *
216    * would lead to an infinite loop. Instead, a special call of pcre_exec() *
217    * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first  *
218    * of these tells PCRE that an empty string is not a valid match; other   *
219    * possibilities must be tried. The second flag restricts PCRE to one     *
220    * match attempt at the initial string position. If this match succeeds,  *
221    * an alternative to the empty string match has been found, and we can    *
222    * proceed round the loop.                                                *
223    *************************************************************************/
224    
225    if (!find_all)
226      {
227      pcre_free(re);   /* Release the memory used for the compiled pattern */
228      return 0;        /* Finish unless -g was given */
229      }
230    
231    /* Loop for second and subsequent matches */
232    
233    for (;;)
234      {
235      int options = 0;                 /* Normally no options */
236      int start_offset = ovector[1];   /* Start at end of previous match */
237    
238      /* If the previous match was for an empty string, we are finished if we are
239      at the end of the subject. Otherwise, arrange to run another match at the
240      same point to see if a non-empty match can be found. */
241    
242      if (ovector[0] == ovector[1])
243        {
244        if (ovector[0] == subject_length) break;
245        options = PCRE_NOTEMPTY | PCRE_ANCHORED;
246        }
247    
248      /* Run the next matching operation */
249    
250      rc = pcre_exec(
251        re,                   /* the compiled pattern */
252        NULL,                 /* no extra data - we didn't study the pattern */
253        subject,              /* the subject string */
254        subject_length,       /* the length of the subject */
255        start_offset,         /* starting offset in the subject */
256        options,              /* options */
257        ovector,              /* output vector for substring information */
258        OVECCOUNT);           /* number of elements in the output vector */
259    
260      /* This time, a result of NOMATCH isn't an error. If the value in "options"
261      is zero, it just means we have found all possible matches, so the loop ends.
262      Otherwise, it means we have failed to find a non-empty-string match at a
263      point where there was a previous empty-string match. In this case, we do what
264      Perl does: advance the matching position by one, and continue. We do this by
265      setting the "end of previous match" offset, because that is picked up at the
266      top of the loop as the point at which to start again. */
267    
268      if (rc == PCRE_ERROR_NOMATCH)
269        {
270        if (options == 0) break;
271        ovector[1] = start_offset + 1;
272        continue;    /* Go round the loop again */
273        }
274    
275      /* Other matching errors are not recoverable. */
276    
277      if (rc < 0)
278        {
279        printf("Matching error %d\n", rc);
280        pcre_free(re);    /* Release memory used for the compiled pattern */
281        return 1;
282        }
283    
284      /* Match succeded */
285    
286      printf("\nMatch succeeded again at offset %d\n", ovector[0]);
287    
288      /* The match succeeded, but the output vector wasn't big enough. */
289    
290      if (rc == 0)
291        {
292        rc = OVECCOUNT/3;
293        printf("ovector only has room for %d captured substrings\n", rc - 1);
294        }
295    
296      /* As before, show substrings stored in the output vector by number, and then
297      also any named substrings. */
298    
299      for (i = 0; i < rc; i++)
300        {
301        char *substring_start = subject + ovector[2*i];
302        int substring_length = ovector[2*i+1] - ovector[2*i];
303        printf("%2d: %.*s\n", i, substring_length, substring_start);
304        }
305    
306      if (namecount <= 0) printf("No named substrings\n"); else
307        {
308        unsigned char *tabptr = name_table;
309        printf("Named substrings\n");
310        for (i = 0; i < namecount; i++)
311          {
312          int n = (tabptr[0] << 8) | tabptr[1];
313          printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
314            ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
315          tabptr += name_entry_size;
316          }
317        }
318      }      /* End of loop to find second and subsequent matches */
319    
320    printf("\n");
321    pcre_free(re);       /* Release memory used for the compiled pattern */
322  return 0;  return 0;
323  }  }
324    
325    /* End of pcredemo.c */

Legend:
Removed from v.53  
changed lines
  Added in v.200

  ViewVC Help
Powered by ViewVC 1.1.5