/[pcre]/code/tags/pcre-7.7/pcredemo.c
ViewVC logotype

Diff of /code/tags/pcre-7.7/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 97 by ph10, Mon Mar 5 12:36:47 2007 UTC
# Line 1  Line 1 
1    /*************************************************
2    *           PCRE DEMONSTRATION PROGRAM           *
3    *************************************************/
4    
5    /* This is a demonstration program to illustrate the most straightforward ways
6    of calling the PCRE regular expression library from a C program. See the
7    pcresample documentation for a short discussion.
8    
9    Compile thuswise:
10      gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
11        -R/usr/local/lib -lpcre
12    
13    Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
14    library files for PCRE are installed on your system. Only some operating
15    systems (e.g. Solaris) use the -R option.
16    */
17    
18    
19    #ifdef HAVE_CONFIG_H
20    #  include <config.h>
21    #endif
22    
23  #include <stdio.h>  #include <stdio.h>
24  #include <string.h>  #include <string.h>
25  #include <pcre.h>  #include <pcre.h>
26    
 /* Compile thuswise:  
   gcc -Wall pcredemo.c -I/opt/local/include -L/opt/local/lib \  
     -R/opt/local/lib -lpcre  
 */  
   
27  #define OVECCOUNT 30    /* should be a multiple of 3 */  #define OVECCOUNT 30    /* should be a multiple of 3 */
28    
29    
30  int main(int argc, char **argv)  int main(int argc, char **argv)
31  {  {
32  pcre *re;  pcre *re;
33  const char *error;  const char *error;
34    char *pattern;
35    char *subject;
36    unsigned char *name_table;
37  int erroffset;  int erroffset;
38    int find_all;
39    int namecount;
40    int name_entry_size;
41  int ovector[OVECCOUNT];  int ovector[OVECCOUNT];
42    int subject_length;
43  int rc, i;  int rc, i;
44    
45  if (argc != 3)  
46    /**************************************************************************
47    * First, sort out the command line. There is only one possible option at  *
48    * the moment, "-g" to request repeated matching to find all occurrences,  *
49    * like Perl's /g option. We set the variable find_all to a non-zero value *
50    * if the -g option is present. Apart from that, there must be exactly two *
51    * arguments.                                                              *
52    **************************************************************************/
53    
54    find_all = 0;
55    for (i = 1; i < argc; i++)
56      {
57      if (strcmp(argv[i], "-g") == 0) find_all = 1;
58        else break;
59      }
60    
61    /* After the options, we require exactly two arguments, which are the pattern,
62    and the subject string. */
63    
64    if (argc - i != 2)
65    {    {
66    printf("Two arguments required: a regex and a subject string\n");    printf("Two arguments required: a regex and a subject string\n");
67    return 1;    return 1;
68    }    }
69    
70  /* Compile the regular expression in the first argument */  pattern = argv[i];
71    subject = argv[i+1];
72    subject_length = (int)strlen(subject);
73    
74    
75    /*************************************************************************
76    * Now we are going to compile the regular expression pattern, and handle *
77    * and errors that are detected.                                          *
78    *************************************************************************/
79    
80  re = pcre_compile(  re = pcre_compile(
81    argv[1],              /* the pattern */    pattern,              /* the pattern */
82    0,                    /* default options */    0,                    /* default options */
83    &error,               /* for error message */    &error,               /* for error message */
84    &erroffset,           /* for error offset */    &erroffset,           /* for error offset */
# Line 40  if (re == NULL) Line 92  if (re == NULL)
92    return 1;    return 1;
93    }    }
94    
95  /* Compilation succeeded: match the subject in the second argument */  
96    /*************************************************************************
97    * If the compilation succeeded, we call PCRE again, in order to do a     *
98    * pattern match against the subject string. This does just ONE match. If *
99    * further matching is needed, it will be done below.                     *
100    *************************************************************************/
101    
102  rc = pcre_exec(  rc = pcre_exec(
103    re,                   /* the compiled pattern */    re,                   /* the compiled pattern */
104    NULL,                 /* no extra data - we didn't study the pattern */    NULL,                 /* no extra data - we didn't study the pattern */
105    argv[2],              /* the subject string */    subject,              /* the subject string */
106    (int)strlen(argv[2]), /* the length of the subject */    subject_length,       /* the length of the subject */
107    0,                    /* start at offset 0 in the subject */    0,                    /* start at offset 0 in the subject */
108    0,                    /* default options */    0,                    /* default options */
109    ovector,              /* output vector for substring information */    ovector,              /* output vector for substring information */
# Line 64  if (rc < 0) Line 121  if (rc < 0)
121      */      */
122      default: printf("Matching error %d\n", rc); break;      default: printf("Matching error %d\n", rc); break;
123      }      }
124      pcre_free(re);     /* Release memory used for the compiled pattern */
125    return 1;    return 1;
126    }    }
127    
128  /* Match succeded */  /* Match succeded */
129    
130  printf("Match succeeded\n");  printf("\nMatch succeeded at offset %d\n", ovector[0]);
131    
132    
133    /*************************************************************************
134    * We have found the first match within the subject string. If the output *
135    * vector wasn't big enough, set its size to the maximum. Then output any *
136    * substrings that were captured.                                         *
137    *************************************************************************/
138    
139  /* The output vector wasn't big enough */  /* The output vector wasn't big enough */
140    
# Line 79  if (rc == 0) Line 144  if (rc == 0)
144    printf("ovector only has room for %d captured substrings\n", rc - 1);    printf("ovector only has room for %d captured substrings\n", rc - 1);
145    }    }
146    
147  /* Show substrings stored in the output vector */  /* Show substrings stored in the output vector by number. Obviously, in a real
148    application you might want to do things other than print them. */
149    
150  for (i = 0; i < rc; i++)  for (i = 0; i < rc; i++)
151    {    {
152    char *substring_start = argv[2] + ovector[2*i];    char *substring_start = subject + ovector[2*i];
153    int substring_length = ovector[2*i+1] - ovector[2*i];    int substring_length = ovector[2*i+1] - ovector[2*i];
154    printf("%2d: %.*s\n", i, substring_length, substring_start);    printf("%2d: %.*s\n", i, substring_length, substring_start);
155    }    }
156    
157    
158    /**************************************************************************
159    * That concludes the basic part of this demonstration program. We have    *
160    * compiled a pattern, and performed a single match. The code that follows *
161    * first shows how to access named substrings, and then how to code for    *
162    * repeated matches on the same subject.                                   *
163    **************************************************************************/
164    
165    /* See if there are any named substrings, and if so, show them by name. First
166    we have to extract the count of named parentheses from the pattern. */
167    
168    (void)pcre_fullinfo(
169      re,                   /* the compiled pattern */
170      NULL,                 /* no extra data - we didn't study the pattern */
171      PCRE_INFO_NAMECOUNT,  /* number of named substrings */
172      &namecount);          /* where to put the answer */
173    
174    if (namecount <= 0) printf("No named substrings\n"); else
175      {
176      unsigned char *tabptr;
177      printf("Named substrings\n");
178    
179      /* Before we can access the substrings, we must extract the table for
180      translating names to numbers, and the size of each entry in the table. */
181    
182      (void)pcre_fullinfo(
183        re,                       /* the compiled pattern */
184        NULL,                     /* no extra data - we didn't study the pattern */
185        PCRE_INFO_NAMETABLE,      /* address of the table */
186        &name_table);             /* where to put the answer */
187    
188      (void)pcre_fullinfo(
189        re,                       /* the compiled pattern */
190        NULL,                     /* no extra data - we didn't study the pattern */
191        PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
192        &name_entry_size);        /* where to put the answer */
193    
194      /* Now we can scan the table and, for each entry, print the number, the name,
195      and the substring itself. */
196    
197      tabptr = name_table;
198      for (i = 0; i < namecount; i++)
199        {
200        int n = (tabptr[0] << 8) | tabptr[1];
201        printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
202          ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
203        tabptr += name_entry_size;
204        }
205      }
206    
207    
208    /*************************************************************************
209    * If the "-g" option was given on the command line, we want to continue  *
210    * to search for additional matches in the subject string, in a similar   *
211    * way to the /g option in Perl. This turns out to be trickier than you   *
212    * might think because of the possibility of matching an empty string.    *
213    * What happens is as follows:                                            *
214    *                                                                        *
215    * If the previous match was NOT for an empty string, we can just start   *
216    * the next match at the end of the previous one.                         *
217    *                                                                        *
218    * If the previous match WAS for an empty string, we can't do that, as it *
219    * would lead to an infinite loop. Instead, a special call of pcre_exec() *
220    * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first  *
221    * of these tells PCRE that an empty string is not a valid match; other   *
222    * possibilities must be tried. The second flag restricts PCRE to one     *
223    * match attempt at the initial string position. If this match succeeds,  *
224    * an alternative to the empty string match has been found, and we can    *
225    * proceed round the loop.                                                *
226    *************************************************************************/
227    
228    if (!find_all)
229      {
230      pcre_free(re);   /* Release the memory used for the compiled pattern */
231      return 0;        /* Finish unless -g was given */
232      }
233    
234    /* Loop for second and subsequent matches */
235    
236    for (;;)
237      {
238      int options = 0;                 /* Normally no options */
239      int start_offset = ovector[1];   /* Start at end of previous match */
240    
241      /* If the previous match was for an empty string, we are finished if we are
242      at the end of the subject. Otherwise, arrange to run another match at the
243      same point to see if a non-empty match can be found. */
244    
245      if (ovector[0] == ovector[1])
246        {
247        if (ovector[0] == subject_length) break;
248        options = PCRE_NOTEMPTY | PCRE_ANCHORED;
249        }
250    
251      /* Run the next matching operation */
252    
253      rc = pcre_exec(
254        re,                   /* the compiled pattern */
255        NULL,                 /* no extra data - we didn't study the pattern */
256        subject,              /* the subject string */
257        subject_length,       /* the length of the subject */
258        start_offset,         /* starting offset in the subject */
259        options,              /* options */
260        ovector,              /* output vector for substring information */
261        OVECCOUNT);           /* number of elements in the output vector */
262    
263      /* This time, a result of NOMATCH isn't an error. If the value in "options"
264      is zero, it just means we have found all possible matches, so the loop ends.
265      Otherwise, it means we have failed to find a non-empty-string match at a
266      point where there was a previous empty-string match. In this case, we do what
267      Perl does: advance the matching position by one, and continue. We do this by
268      setting the "end of previous match" offset, because that is picked up at the
269      top of the loop as the point at which to start again. */
270    
271      if (rc == PCRE_ERROR_NOMATCH)
272        {
273        if (options == 0) break;
274        ovector[1] = start_offset + 1;
275        continue;    /* Go round the loop again */
276        }
277    
278      /* Other matching errors are not recoverable. */
279    
280      if (rc < 0)
281        {
282        printf("Matching error %d\n", rc);
283        pcre_free(re);    /* Release memory used for the compiled pattern */
284        return 1;
285        }
286    
287      /* Match succeded */
288    
289      printf("\nMatch succeeded again at offset %d\n", ovector[0]);
290    
291      /* The match succeeded, but the output vector wasn't big enough. */
292    
293      if (rc == 0)
294        {
295        rc = OVECCOUNT/3;
296        printf("ovector only has room for %d captured substrings\n", rc - 1);
297        }
298    
299      /* As before, show substrings stored in the output vector by number, and then
300      also any named substrings. */
301    
302      for (i = 0; i < rc; i++)
303        {
304        char *substring_start = subject + ovector[2*i];
305        int substring_length = ovector[2*i+1] - ovector[2*i];
306        printf("%2d: %.*s\n", i, substring_length, substring_start);
307        }
308    
309      if (namecount <= 0) printf("No named substrings\n"); else
310        {
311        unsigned char *tabptr = name_table;
312        printf("Named substrings\n");
313        for (i = 0; i < namecount; i++)
314          {
315          int n = (tabptr[0] << 8) | tabptr[1];
316          printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
317            ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
318          tabptr += name_entry_size;
319          }
320        }
321      }      /* End of loop to find second and subsequent matches */
322    
323    printf("\n");
324    pcre_free(re);       /* Release memory used for the compiled pattern */
325  return 0;  return 0;
326  }  }
327    
328    /* End of pcredemo.c */

Legend:
Removed from v.53  
changed lines
  Added in v.97

  ViewVC Help
Powered by ViewVC 1.1.5