/[pcre]/code/trunk/pcredemo.c
ViewVC logotype

Diff of /code/trunk/pcredemo.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 477 by ph10, Sun Jan 3 15:53:09 2010 UTC
# Line 1  Line 1 
1    /*************************************************
2    *           PCRE DEMONSTRATION PROGRAM           *
3    *************************************************/
4    
5    /* This is a demonstration program to illustrate the most straightforward ways
6    of calling the PCRE regular expression library from a C program. See the
7    pcresample documentation for a short discussion ("man pcresample" if you have
8    the PCRE man pages installed).
9    
10    In Unix-like environments, if PCRE is installed in your standard system
11    libraries, you should be able to compile this program using this command:
12    
13    gcc -Wall pcredemo.c -lpcre -o pcredemo
14    
15    If PCRE is not installed in a standard place, it is likely to be installed with
16    support for the pkg-config mechanism. If you have pkg-config, you can compile
17    this program using this command:
18    
19    gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
20    
21    If you do not have pkg-config, you may have to use this:
22    
23    gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
24      -R/usr/local/lib -lpcre -o pcredemo
25    
26    Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
27    library files for PCRE are installed on your system. Only some operating
28    systems (e.g. Solaris) use the -R option.
29    
30    Building under Windows:
31    
32    If you want to statically link this program against a non-dll .a file, you must
33    define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
34    pcre_free() exported functions will be declared __declspec(dllimport), with
35    unwanted results. So in this environment, uncomment the following line. */
36    
37    /* #define PCRE_STATIC */
38    
39  #include <stdio.h>  #include <stdio.h>
40  #include <string.h>  #include <string.h>
41  #include <pcre.h>  #include <pcre.h>
42    
 /* Compile thuswise:  
   gcc -Wall pcredemo.c -I/opt/local/include -L/opt/local/lib \  
     -R/opt/local/lib -lpcre  
 */  
   
43  #define OVECCOUNT 30    /* should be a multiple of 3 */  #define OVECCOUNT 30    /* should be a multiple of 3 */
44    
45    
46  int main(int argc, char **argv)  int main(int argc, char **argv)
47  {  {
48  pcre *re;  pcre *re;
49  const char *error;  const char *error;
50    char *pattern;
51    char *subject;
52    unsigned char *name_table;
53  int erroffset;  int erroffset;
54    int find_all;
55    int namecount;
56    int name_entry_size;
57  int ovector[OVECCOUNT];  int ovector[OVECCOUNT];
58    int subject_length;
59  int rc, i;  int rc, i;
60    
61  if (argc != 3)  
62    /**************************************************************************
63    * First, sort out the command line. There is only one possible option at  *
64    * the moment, "-g" to request repeated matching to find all occurrences,  *
65    * like Perl's /g option. We set the variable find_all to a non-zero value *
66    * if the -g option is present. Apart from that, there must be exactly two *
67    * arguments.                                                              *
68    **************************************************************************/
69    
70    find_all = 0;
71    for (i = 1; i < argc; i++)
72      {
73      if (strcmp(argv[i], "-g") == 0) find_all = 1;
74        else break;
75      }
76    
77    /* After the options, we require exactly two arguments, which are the pattern,
78    and the subject string. */
79    
80    if (argc - i != 2)
81    {    {
82    printf("Two arguments required: a regex and a subject string\n");    printf("Two arguments required: a regex and a subject string\n");
83    return 1;    return 1;
84    }    }
85    
86  /* Compile the regular expression in the first argument */  pattern = argv[i];
87    subject = argv[i+1];
88    subject_length = (int)strlen(subject);
89    
90    
91    /*************************************************************************
92    * Now we are going to compile the regular expression pattern, and handle *
93    * and errors that are detected.                                          *
94    *************************************************************************/
95    
96  re = pcre_compile(  re = pcre_compile(
97    argv[1],              /* the pattern */    pattern,              /* the pattern */
98    0,                    /* default options */    0,                    /* default options */
99    &error,               /* for error message */    &error,               /* for error message */
100    &erroffset,           /* for error offset */    &erroffset,           /* for error offset */
# Line 40  if (re == NULL) Line 108  if (re == NULL)
108    return 1;    return 1;
109    }    }
110    
111  /* Compilation succeeded: match the subject in the second argument */  
112    /*************************************************************************
113    * If the compilation succeeded, we call PCRE again, in order to do a     *
114    * pattern match against the subject string. This does just ONE match. If *
115    * further matching is needed, it will be done below.                     *
116    *************************************************************************/
117    
118  rc = pcre_exec(  rc = pcre_exec(
119    re,                   /* the compiled pattern */    re,                   /* the compiled pattern */
120    NULL,                 /* no extra data - we didn't study the pattern */    NULL,                 /* no extra data - we didn't study the pattern */
121    argv[2],              /* the subject string */    subject,              /* the subject string */
122    (int)strlen(argv[2]), /* the length of the subject */    subject_length,       /* the length of the subject */
123    0,                    /* start at offset 0 in the subject */    0,                    /* start at offset 0 in the subject */
124    0,                    /* default options */    0,                    /* default options */
125    ovector,              /* output vector for substring information */    ovector,              /* output vector for substring information */
# Line 64  if (rc < 0) Line 137  if (rc < 0)
137      */      */
138      default: printf("Matching error %d\n", rc); break;      default: printf("Matching error %d\n", rc); break;
139      }      }
140      pcre_free(re);     /* Release memory used for the compiled pattern */
141    return 1;    return 1;
142    }    }
143    
144  /* Match succeded */  /* Match succeded */
145    
146  printf("Match succeeded\n");  printf("\nMatch succeeded at offset %d\n", ovector[0]);
147    
148    
149    /*************************************************************************
150    * We have found the first match within the subject string. If the output *
151    * vector wasn't big enough, say so. Then output any substrings that were *
152    * captured.                                                              *
153    *************************************************************************/
154    
155  /* The output vector wasn't big enough */  /* The output vector wasn't big enough */
156    
# Line 79  if (rc == 0) Line 160  if (rc == 0)
160    printf("ovector only has room for %d captured substrings\n", rc - 1);    printf("ovector only has room for %d captured substrings\n", rc - 1);
161    }    }
162    
163  /* Show substrings stored in the output vector */  /* Show substrings stored in the output vector by number. Obviously, in a real
164    application you might want to do things other than print them. */
165    
166  for (i = 0; i < rc; i++)  for (i = 0; i < rc; i++)
167    {    {
168    char *substring_start = argv[2] + ovector[2*i];    char *substring_start = subject + ovector[2*i];
169    int substring_length = ovector[2*i+1] - ovector[2*i];    int substring_length = ovector[2*i+1] - ovector[2*i];
170    printf("%2d: %.*s\n", i, substring_length, substring_start);    printf("%2d: %.*s\n", i, substring_length, substring_start);
171    }    }
172    
173    
174    /**************************************************************************
175    * That concludes the basic part of this demonstration program. We have    *
176    * compiled a pattern, and performed a single match. The code that follows *
177    * shows first how to access named substrings, and then how to code for    *
178    * repeated matches on the same subject.                                   *
179    **************************************************************************/
180    
181    /* See if there are any named substrings, and if so, show them by name. First
182    we have to extract the count of named parentheses from the pattern. */
183    
184    (void)pcre_fullinfo(
185      re,                   /* the compiled pattern */
186      NULL,                 /* no extra data - we didn't study the pattern */
187      PCRE_INFO_NAMECOUNT,  /* number of named substrings */
188      &namecount);          /* where to put the answer */
189    
190    if (namecount <= 0) printf("No named substrings\n"); else
191      {
192      unsigned char *tabptr;
193      printf("Named substrings\n");
194    
195      /* Before we can access the substrings, we must extract the table for
196      translating names to numbers, and the size of each entry in the table. */
197    
198      (void)pcre_fullinfo(
199        re,                       /* the compiled pattern */
200        NULL,                     /* no extra data - we didn't study the pattern */
201        PCRE_INFO_NAMETABLE,      /* address of the table */
202        &name_table);             /* where to put the answer */
203    
204      (void)pcre_fullinfo(
205        re,                       /* the compiled pattern */
206        NULL,                     /* no extra data - we didn't study the pattern */
207        PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
208        &name_entry_size);        /* where to put the answer */
209    
210      /* Now we can scan the table and, for each entry, print the number, the name,
211      and the substring itself. */
212    
213      tabptr = name_table;
214      for (i = 0; i < namecount; i++)
215        {
216        int n = (tabptr[0] << 8) | tabptr[1];
217        printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
218          ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
219        tabptr += name_entry_size;
220        }
221      }
222    
223    
224    /*************************************************************************
225    * If the "-g" option was given on the command line, we want to continue  *
226    * to search for additional matches in the subject string, in a similar   *
227    * way to the /g option in Perl. This turns out to be trickier than you   *
228    * might think because of the possibility of matching an empty string.    *
229    * What happens is as follows:                                            *
230    *                                                                        *
231    * If the previous match was NOT for an empty string, we can just start   *
232    * the next match at the end of the previous one.                         *
233    *                                                                        *
234    * If the previous match WAS for an empty string, we can't do that, as it *
235    * would lead to an infinite loop. Instead, a special call of pcre_exec() *
236    * is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set.    *
237    * The first of these tells PCRE that an empty string at the start of the *
238    * subject is not a valid match; other possibilities must be tried. The   *
239    * second flag restricts PCRE to one match attempt at the initial string  *
240    * position. If this match succeeds, an alternative to the empty string   *
241    * match has been found, and we can proceed round the loop.               *
242    *************************************************************************/
243    
244    if (!find_all)
245      {
246      pcre_free(re);   /* Release the memory used for the compiled pattern */
247      return 0;        /* Finish unless -g was given */
248      }
249    
250    /* Loop for second and subsequent matches */
251    
252    for (;;)
253      {
254      int options = 0;                 /* Normally no options */
255      int start_offset = ovector[1];   /* Start at end of previous match */
256    
257      /* If the previous match was for an empty string, we are finished if we are
258      at the end of the subject. Otherwise, arrange to run another match at the
259      same point to see if a non-empty match can be found. */
260    
261      if (ovector[0] == ovector[1])
262        {
263        if (ovector[0] == subject_length) break;
264        options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
265        }
266    
267      /* Run the next matching operation */
268    
269      rc = pcre_exec(
270        re,                   /* the compiled pattern */
271        NULL,                 /* no extra data - we didn't study the pattern */
272        subject,              /* the subject string */
273        subject_length,       /* the length of the subject */
274        start_offset,         /* starting offset in the subject */
275        options,              /* options */
276        ovector,              /* output vector for substring information */
277        OVECCOUNT);           /* number of elements in the output vector */
278    
279      /* This time, a result of NOMATCH isn't an error. If the value in "options"
280      is zero, it just means we have found all possible matches, so the loop ends.
281      Otherwise, it means we have failed to find a non-empty-string match at a
282      point where there was a previous empty-string match. In this case, we do what
283      Perl does: advance the matching position by one, and continue. We do this by
284      setting the "end of previous match" offset, because that is picked up at the
285      top of the loop as the point at which to start again. */
286    
287      if (rc == PCRE_ERROR_NOMATCH)
288        {
289        if (options == 0) break;
290        ovector[1] = start_offset + 1;
291        continue;    /* Go round the loop again */
292        }
293    
294      /* Other matching errors are not recoverable. */
295    
296      if (rc < 0)
297        {
298        printf("Matching error %d\n", rc);
299        pcre_free(re);    /* Release memory used for the compiled pattern */
300        return 1;
301        }
302    
303      /* Match succeded */
304    
305      printf("\nMatch succeeded again at offset %d\n", ovector[0]);
306    
307      /* The match succeeded, but the output vector wasn't big enough. */
308    
309      if (rc == 0)
310        {
311        rc = OVECCOUNT/3;
312        printf("ovector only has room for %d captured substrings\n", rc - 1);
313        }
314    
315      /* As before, show substrings stored in the output vector by number, and then
316      also any named substrings. */
317    
318      for (i = 0; i < rc; i++)
319        {
320        char *substring_start = subject + ovector[2*i];
321        int substring_length = ovector[2*i+1] - ovector[2*i];
322        printf("%2d: %.*s\n", i, substring_length, substring_start);
323        }
324    
325      if (namecount <= 0) printf("No named substrings\n"); else
326        {
327        unsigned char *tabptr = name_table;
328        printf("Named substrings\n");
329        for (i = 0; i < namecount; i++)
330          {
331          int n = (tabptr[0] << 8) | tabptr[1];
332          printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
333            ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
334          tabptr += name_entry_size;
335          }
336        }
337      }      /* End of loop to find second and subsequent matches */
338    
339    printf("\n");
340    pcre_free(re);       /* Release memory used for the compiled pattern */
341  return 0;  return 0;
342  }  }
343    
344    /* End of pcredemo.c */

Legend:
Removed from v.53  
changed lines
  Added in v.477

  ViewVC Help
Powered by ViewVC 1.1.5