/[pcre]/code/tags/pcre-7.7/pcredemo.c
ViewVC logotype

Diff of /code/tags/pcre-7.7/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC
# Line 1  Line 1 
1    /*************************************************
2    *           PCRE DEMONSTRATION PROGRAM           *
3    *************************************************/
4    
5    /* This is a demonstration program to illustrate the most straightforward ways
6    of calling the PCRE regular expression library from a C program. See the
7    pcresample documentation for a short discussion.
8    
9    Compile thuswise:
10      gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
11        -R/usr/local/lib -lpcre
12    
13    Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
14    library files for PCRE are installed on your system. Only some operating
15    systems (e.g. Solaris) use the -R option.
16    */
17    
18    
19  #include <stdio.h>  #include <stdio.h>
20  #include <string.h>  #include <string.h>
21  #include <pcre.h>  #include <pcre.h>
22    
 /* Compile thuswise:  
   gcc -Wall pcredemo.c -I/opt/local/include -L/opt/local/lib \  
     -R/opt/local/lib -lpcre  
 */  
   
23  #define OVECCOUNT 30    /* should be a multiple of 3 */  #define OVECCOUNT 30    /* should be a multiple of 3 */
24    
25    
26  int main(int argc, char **argv)  int main(int argc, char **argv)
27  {  {
28  pcre *re;  pcre *re;
29  const char *error;  const char *error;
30    char *pattern;
31    char *subject;
32    unsigned char *name_table;
33  int erroffset;  int erroffset;
34    int find_all;
35    int namecount;
36    int name_entry_size;
37  int ovector[OVECCOUNT];  int ovector[OVECCOUNT];
38    int subject_length;
39  int rc, i;  int rc, i;
40    
41  if (argc != 3)  
42    /**************************************************************************
43    * First, sort out the command line. There is only one possible option at  *
44    * the moment, "-g" to request repeated matching to find all occurrences,  *
45    * like Perl's /g option. We set the variable find_all to a non-zero value *
46    * if the -g option is present. Apart from that, there must be exactly two *
47    * arguments.                                                              *
48    **************************************************************************/
49    
50    find_all = 0;
51    for (i = 1; i < argc; i++)
52      {
53      if (strcmp(argv[i], "-g") == 0) find_all = 1;
54        else break;
55      }
56    
57    /* After the options, we require exactly two arguments, which are the pattern,
58    and the subject string. */
59    
60    if (argc - i != 2)
61    {    {
62    printf("Two arguments required: a regex and a subject string\n");    printf("Two arguments required: a regex and a subject string\n");
63    return 1;    return 1;
64    }    }
65    
66  /* Compile the regular expression in the first argument */  pattern = argv[i];
67    subject = argv[i+1];
68    subject_length = (int)strlen(subject);
69    
70    
71    /*************************************************************************
72    * Now we are going to compile the regular expression pattern, and handle *
73    * and errors that are detected.                                          *
74    *************************************************************************/
75    
76  re = pcre_compile(  re = pcre_compile(
77    argv[1],              /* the pattern */    pattern,              /* the pattern */
78    0,                    /* default options */    0,                    /* default options */
79    &error,               /* for error message */    &error,               /* for error message */
80    &erroffset,           /* for error offset */    &erroffset,           /* for error offset */
# Line 40  if (re == NULL) Line 88  if (re == NULL)
88    return 1;    return 1;
89    }    }
90    
91  /* Compilation succeeded: match the subject in the second argument */  
92    /*************************************************************************
93    * If the compilation succeeded, we call PCRE again, in order to do a     *
94    * pattern match against the subject string. This does just ONE match. If *
95    * further matching is needed, it will be done below.                     *
96    *************************************************************************/
97    
98  rc = pcre_exec(  rc = pcre_exec(
99    re,                   /* the compiled pattern */    re,                   /* the compiled pattern */
100    NULL,                 /* no extra data - we didn't study the pattern */    NULL,                 /* no extra data - we didn't study the pattern */
101    argv[2],              /* the subject string */    subject,              /* the subject string */
102    (int)strlen(argv[2]), /* the length of the subject */    subject_length,       /* the length of the subject */
103    0,                    /* start at offset 0 in the subject */    0,                    /* start at offset 0 in the subject */
104    0,                    /* default options */    0,                    /* default options */
105    ovector,              /* output vector for substring information */    ovector,              /* output vector for substring information */
# Line 64  if (rc < 0) Line 117  if (rc < 0)
117      */      */
118      default: printf("Matching error %d\n", rc); break;      default: printf("Matching error %d\n", rc); break;
119      }      }
120      pcre_free(re);     /* Release memory used for the compiled pattern */
121    return 1;    return 1;
122    }    }
123    
124  /* Match succeded */  /* Match succeded */
125    
126  printf("Match succeeded\n");  printf("\nMatch succeeded at offset %d\n", ovector[0]);
127    
128    
129    /*************************************************************************
130    * We have found the first match within the subject string. If the output *
131    * vector wasn't big enough, set its size to the maximum. Then output any *
132    * substrings that were captured.                                         *
133    *************************************************************************/
134    
135  /* The output vector wasn't big enough */  /* The output vector wasn't big enough */
136    
# Line 79  if (rc == 0) Line 140  if (rc == 0)
140    printf("ovector only has room for %d captured substrings\n", rc - 1);    printf("ovector only has room for %d captured substrings\n", rc - 1);
141    }    }
142    
143  /* Show substrings stored in the output vector */  /* Show substrings stored in the output vector by number. Obviously, in a real
144    application you might want to do things other than print them. */
145    
146  for (i = 0; i < rc; i++)  for (i = 0; i < rc; i++)
147    {    {
148    char *substring_start = argv[2] + ovector[2*i];    char *substring_start = subject + ovector[2*i];
149    int substring_length = ovector[2*i+1] - ovector[2*i];    int substring_length = ovector[2*i+1] - ovector[2*i];
150    printf("%2d: %.*s\n", i, substring_length, substring_start);    printf("%2d: %.*s\n", i, substring_length, substring_start);
151    }    }
152    
153    
154    /**************************************************************************
155    * That concludes the basic part of this demonstration program. We have    *
156    * compiled a pattern, and performed a single match. The code that follows *
157    * first shows how to access named substrings, and then how to code for    *
158    * repeated matches on the same subject.                                   *
159    **************************************************************************/
160    
161    /* See if there are any named substrings, and if so, show them by name. First
162    we have to extract the count of named parentheses from the pattern. */
163    
164    (void)pcre_fullinfo(
165      re,                   /* the compiled pattern */
166      NULL,                 /* no extra data - we didn't study the pattern */
167      PCRE_INFO_NAMECOUNT,  /* number of named substrings */
168      &namecount);          /* where to put the answer */
169    
170    if (namecount <= 0) printf("No named substrings\n"); else
171      {
172      unsigned char *tabptr;
173      printf("Named substrings\n");
174    
175      /* Before we can access the substrings, we must extract the table for
176      translating names to numbers, and the size of each entry in the table. */
177    
178      (void)pcre_fullinfo(
179        re,                       /* the compiled pattern */
180        NULL,                     /* no extra data - we didn't study the pattern */
181        PCRE_INFO_NAMETABLE,      /* address of the table */
182        &name_table);             /* where to put the answer */
183    
184      (void)pcre_fullinfo(
185        re,                       /* the compiled pattern */
186        NULL,                     /* no extra data - we didn't study the pattern */
187        PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
188        &name_entry_size);        /* where to put the answer */
189    
190      /* Now we can scan the table and, for each entry, print the number, the name,
191      and the substring itself. */
192    
193      tabptr = name_table;
194      for (i = 0; i < namecount; i++)
195        {
196        int n = (tabptr[0] << 8) | tabptr[1];
197        printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
198          ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
199        tabptr += name_entry_size;
200        }
201      }
202    
203    
204    /*************************************************************************
205    * If the "-g" option was given on the command line, we want to continue  *
206    * to search for additional matches in the subject string, in a similar   *
207    * way to the /g option in Perl. This turns out to be trickier than you   *
208    * might think because of the possibility of matching an empty string.    *
209    * What happens is as follows:                                            *
210    *                                                                        *
211    * If the previous match was NOT for an empty string, we can just start   *
212    * the next match at the end of the previous one.                         *
213    *                                                                        *
214    * If the previous match WAS for an empty string, we can't do that, as it *
215    * would lead to an infinite loop. Instead, a special call of pcre_exec() *
216    * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first  *
217    * of these tells PCRE that an empty string is not a valid match; other   *
218    * possibilities must be tried. The second flag restricts PCRE to one     *
219    * match attempt at the initial string position. If this match succeeds,  *
220    * an alternative to the empty string match has been found, and we can    *
221    * proceed round the loop.                                                *
222    *************************************************************************/
223    
224    if (!find_all)
225      {
226      pcre_free(re);   /* Release the memory used for the compiled pattern */
227      return 0;        /* Finish unless -g was given */
228      }
229    
230    /* Loop for second and subsequent matches */
231    
232    for (;;)
233      {
234      int options = 0;                 /* Normally no options */
235      int start_offset = ovector[1];   /* Start at end of previous match */
236    
237      /* If the previous match was for an empty string, we are finished if we are
238      at the end of the subject. Otherwise, arrange to run another match at the
239      same point to see if a non-empty match can be found. */
240    
241      if (ovector[0] == ovector[1])
242        {
243        if (ovector[0] == subject_length) break;
244        options = PCRE_NOTEMPTY | PCRE_ANCHORED;
245        }
246    
247      /* Run the next matching operation */
248    
249      rc = pcre_exec(
250        re,                   /* the compiled pattern */
251        NULL,                 /* no extra data - we didn't study the pattern */
252        subject,              /* the subject string */
253        subject_length,       /* the length of the subject */
254        start_offset,         /* starting offset in the subject */
255        options,              /* options */
256        ovector,              /* output vector for substring information */
257        OVECCOUNT);           /* number of elements in the output vector */
258    
259      /* This time, a result of NOMATCH isn't an error. If the value in "options"
260      is zero, it just means we have found all possible matches, so the loop ends.
261      Otherwise, it means we have failed to find a non-empty-string match at a
262      point where there was a previous empty-string match. In this case, we do what
263      Perl does: advance the matching position by one, and continue. We do this by
264      setting the "end of previous match" offset, because that is picked up at the
265      top of the loop as the point at which to start again. */
266    
267      if (rc == PCRE_ERROR_NOMATCH)
268        {
269        if (options == 0) break;
270        ovector[1] = start_offset + 1;
271        continue;    /* Go round the loop again */
272        }
273    
274      /* Other matching errors are not recoverable. */
275    
276      if (rc < 0)
277        {
278        printf("Matching error %d\n", rc);
279        pcre_free(re);    /* Release memory used for the compiled pattern */
280        return 1;
281        }
282    
283      /* Match succeded */
284    
285      printf("\nMatch succeeded again at offset %d\n", ovector[0]);
286    
287      /* The match succeeded, but the output vector wasn't big enough. */
288    
289      if (rc == 0)
290        {
291        rc = OVECCOUNT/3;
292        printf("ovector only has room for %d captured substrings\n", rc - 1);
293        }
294    
295      /* As before, show substrings stored in the output vector by number, and then
296      also any named substrings. */
297    
298      for (i = 0; i < rc; i++)
299        {
300        char *substring_start = subject + ovector[2*i];
301        int substring_length = ovector[2*i+1] - ovector[2*i];
302        printf("%2d: %.*s\n", i, substring_length, substring_start);
303        }
304    
305      if (namecount <= 0) printf("No named substrings\n"); else
306        {
307        unsigned char *tabptr = name_table;
308        printf("Named substrings\n");
309        for (i = 0; i < namecount; i++)
310          {
311          int n = (tabptr[0] << 8) | tabptr[1];
312          printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
313            ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
314          tabptr += name_entry_size;
315          }
316        }
317      }      /* End of loop to find second and subsequent matches */
318    
319    printf("\n");
320    pcre_free(re);       /* Release memory used for the compiled pattern */
321  return 0;  return 0;
322  }  }
323    
324    /* End of pcredemo.c */

Legend:
Removed from v.53  
changed lines
  Added in v.96

  ViewVC Help
Powered by ViewVC 1.1.5