/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2005 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #include <ctype.h>
41  #include <stdio.h>  #include <stdio.h>
42  #include <string.h>  #include <string.h>
43  #include <stdlib.h>  #include <stdlib.h>
44  #include <errno.h>  #include <errno.h>
45    
46    #include <sys/types.h>
47    #include <sys/stat.h>
48    #include <unistd.h>
49    
50  #include "config.h"  #include "config.h"
51  #include "pcre.h"  #include "pcre.h"
52    
# Line 17  its pattern matching. */ Line 55  its pattern matching. */
55    
56  typedef int BOOL;  typedef int BOOL;
57    
58    #define VERSION "4.0 07-Jun-2005"
59    #define MAX_PATTERN_COUNT 100
60    
61    #if BUFSIZ > 8192
62    #define MBUFTHIRD BUFSIZ
63    #else
64    #define MBUFTHIRD 8192
65    #endif
66    
67    
68    
69  /*************************************************  /*************************************************
70  *               Global variables                 *  *               Global variables                 *
71  *************************************************/  *************************************************/
72    
73  static pcre *pattern;  static char *pattern_filename = NULL;
74  static pcre_extra *hints;  static char *stdin_name = (char *)"(standard input)";
75    static int  pattern_count = 0;
76    static pcre **pattern_list;
77    static pcre_extra **hints_list;
78    
79    static char *include_pattern = NULL;
80    static char *exclude_pattern = NULL;
81    
82    static pcre *include_compiled = NULL;
83    static pcre *exclude_compiled = NULL;
84    
85    static int after_context = 0;
86    static int before_context = 0;
87    static int both_context = 0;
88    
89  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
90    static BOOL filenames = TRUE;
91  static BOOL filenames_only = FALSE;  static BOOL filenames_only = FALSE;
92    static BOOL filenames_nomatch_only = FALSE;
93    static BOOL hyphenpending = FALSE;
94  static BOOL invert = FALSE;  static BOOL invert = FALSE;
95    static BOOL multiline = FALSE;
96  static BOOL number = FALSE;  static BOOL number = FALSE;
97    static BOOL quiet = FALSE;
98    static BOOL recurse = FALSE;
99  static BOOL silent = FALSE;  static BOOL silent = FALSE;
100  static BOOL whole_lines = FALSE;  static BOOL whole_lines = FALSE;
101    static BOOL word_match = FALSE;
102    
103    /* Structure for options and list of them */
104    
105    enum { OP_NODATA, OP_STRING, OP_NUMBER };
106    
107    typedef struct option_item {
108      int type;
109      int one_char;
110      void *dataptr;
111      const char *long_name;
112      const char *help_text;
113    } option_item;
114    
115    static option_item optionlist[] = {
116      { OP_NODATA, -1,  NULL,              "",              "  terminate options" },
117      { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },
118      { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },
119      { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },
120      { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },
121      { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },
122      { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },
123      { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },
124      { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },
125      { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },
126      { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },
127      { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },
128      { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },
129      { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },
130      { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },
131      { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },
132      { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
133      { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },
134      { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },
135      { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },
136      { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },
137      { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },
138      { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },
139      { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },
140      { OP_NODATA, 0,   NULL,               NULL,            NULL }
141    };
142    
143    
144    /*************************************************
145    *       Functions for directory scanning         *
146    *************************************************/
147    
148    /* These functions are defined so that they can be made system specific,
149    although at present the only ones are for Unix, Win32, and for "no directory
150    recursion support". */
151    
152    
153    /************* Directory scanning in Unix ***********/
154    
155    #if IS_UNIX
156    #include <sys/types.h>
157    #include <sys/stat.h>
158    #include <dirent.h>
159    
160    typedef DIR directory_type;
161    
162    static int
163    isdirectory(char *filename)
164    {
165    struct stat statbuf;
166    if (stat(filename, &statbuf) < 0)
167      return 0;        /* In the expectation that opening as a file will fail */
168    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
169    }
170    
171    static directory_type *
172    opendirectory(char *filename)
173    {
174    return opendir(filename);
175    }
176    
177    static char *
178    readdirectory(directory_type *dir)
179    {
180    for (;;)
181      {
182      struct dirent *dent = readdir(dir);
183      if (dent == NULL) return NULL;
184      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
185        return dent->d_name;
186      }
187    return NULL;   /* Keep compiler happy; never executed */
188    }
189    
190    static void
191    closedirectory(directory_type *dir)
192    {
193    closedir(dir);
194    }
195    
196    
197    /************* Directory scanning in Win32 ***********/
198    
199    /* I (Philip Hazel) have no means of testing this code. It was contributed by
200    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
201    when it did not exist. */
202    
203    
204    #elif HAVE_WIN32API
205    
206    #ifndef STRICT
207    # define STRICT
208    #endif
209    #ifndef WIN32_LEAN_AND_MEAN
210    # define WIN32_LEAN_AND_MEAN
211    #endif
212    #ifndef INVALID_FILE_ATTRIBUTES
213    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
214    #endif
215    
216    #include <windows.h>
217    
218    typedef struct directory_type
219    {
220    HANDLE handle;
221    BOOL first;
222    WIN32_FIND_DATA data;
223    } directory_type;
224    
225    int
226    isdirectory(char *filename)
227    {
228    DWORD attr = GetFileAttributes(filename);
229    if (attr == INVALID_FILE_ATTRIBUTES)
230      return 0;
231    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
232    }
233    
234    directory_type *
235    opendirectory(char *filename)
236    {
237    size_t len;
238    char *pattern;
239    directory_type *dir;
240    DWORD err;
241    len = strlen(filename);
242    pattern = (char *) malloc(len + 3);
243    dir = (directory_type *) malloc(sizeof(*dir));
244    if ((pattern == NULL) || (dir == NULL))
245      {
246      fprintf(stderr, "pcregrep: malloc failed\n");
247      exit(2);
248      }
249    memcpy(pattern, filename, len);
250    memcpy(&(pattern[len]), "\\*", 3);
251    dir->handle = FindFirstFile(pattern, &(dir->data));
252    if (dir->handle != INVALID_HANDLE_VALUE)
253      {
254      free(pattern);
255      dir->first = TRUE;
256      return dir;
257      }
258    err = GetLastError();
259    free(pattern);
260    free(dir);
261    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
262    return NULL;
263    }
264    
265    char *
266    readdirectory(directory_type *dir)
267    {
268    for (;;)
269      {
270      if (!dir->first)
271        {
272        if (!FindNextFile(dir->handle, &(dir->data)))
273          return NULL;
274        }
275      else
276        {
277        dir->first = FALSE;
278        }
279      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
280        return dir->data.cFileName;
281      }
282    #ifndef _MSC_VER
283    return NULL;   /* Keep compiler happy; never executed */
284    #endif
285    }
286    
287    void
288    closedirectory(directory_type *dir)
289    {
290    FindClose(dir->handle);
291    free(dir);
292    }
293    
294    
295    /************* Directory scanning when we can't do it ***********/
296    
297    /* The type is void, and apart from isdirectory(), the functions do nothing. */
298    
299    #else
300    
301    typedef void directory_type;
302    
303    int isdirectory(char *filename) { return FALSE; }
304    directory_type * opendirectory(char *filename) {}
305    char *readdirectory(directory_type *dir) {}
306    void closedirectory(directory_type *dir) {}
307    
308    #endif
309    
310    
311    
# Line 58  return sys_errlist[n]; Line 332  return sys_errlist[n];
332    
333    
334  /*************************************************  /*************************************************
335  *              Grep an individual file           *  *       Print the previous "after" lines         *
336    *************************************************/
337    
338    /* This is called if we are about to lose said lines because of buffer filling,
339    and at the end of the file.
340    
341    Arguments:
342      lastmatchnumber   the number of the last matching line, plus one
343      lastmatchrestart  where we restarted after the last match
344      endptr            end of available data
345      printname         filename for printing
346    
347    Returns:            nothing
348    */
349    
350    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
351      char *endptr, char *printname)
352    {
353    if (after_context > 0 && lastmatchnumber > 0)
354      {
355      int count = 0;
356      while (lastmatchrestart < endptr && count++ < after_context)
357        {
358        char *pp = lastmatchrestart;
359        if (printname != NULL) fprintf(stdout, "%s-", printname);
360        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
361        while (*pp != '\n') pp++;
362        fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
363        lastmatchrestart = pp + 1;
364        }
365      hyphenpending = TRUE;
366      }
367    }
368    
369    
370    
371    /*************************************************
372    *            Grep an individual file             *
373  *************************************************/  *************************************************/
374    
375    /* This is called from grep_or_recurse() below. It uses a buffer that is three
376    times the value of MBUFTHIRD. The matching point is never allowed to stray into
377    the top third of the buffer, thus keeping more of the file available for
378    context printing or for multiline scanning. For large files, the pointer will
379    be in the middle third most of the time, so the bottom third is available for
380    "before" context printing.
381    
382    Arguments:
383      in           the fopened FILE stream
384      printname    the file name if it is to be printed for each match
385                   or NULL if the file name is not to be printed
386                   it cannot be NULL if filenames[_nomatch]_only is set
387    
388    Returns:       0 if there was at least one match
389                   1 otherwise (no matches)
390    */
391    
392  static int  static int
393  pcregrep(FILE *in, char *name)  pcregrep(FILE *in, char *printname)
394  {  {
395  int rc = 1;  int rc = 1;
396  int linenumber = 0;  int linenumber = 1;
397    int lastmatchnumber = 0;
398  int count = 0;  int count = 0;
399  int offsets[99];  int offsets[99];
400  char buffer[BUFSIZ];  char *lastmatchrestart = NULL;
401    char buffer[3*MBUFTHIRD];
402    char *ptr = buffer;
403    char *endptr;
404    size_t bufflength;
405    BOOL endhyphenpending = FALSE;
406    
407    /* Do the first read into the start of the buffer and set up the pointer to
408    end of what we have. */
409    
410    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
411    endptr = buffer + bufflength;
412    
413    /* Loop while the current pointer is not at the end of the file. For large
414    files, endptr will be at the end of the buffer when we are in the middle of the
415    file, but ptr will never get there, because as soon as it gets over 2/3 of the
416    way, the buffer is shifted left and re-filled. */
417    
418  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (ptr < endptr)
419    {    {
420    BOOL match;    int i;
421    int length = (int)strlen(buffer);    BOOL match = FALSE;
422    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    char *t = ptr;
423    linenumber++;    size_t length, linelength;
424    
425      /* At this point, ptr is at the start of a line. We need to find the length
426      of the subject string to pass to pcre_exec(). In multiline mode, it is the
427      length remainder of the data in the buffer. Otherwise, it is the length of
428      the next line. After matching, we always advance by the length of the next
429      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
430      that any match is constrained to be in the first line. */
431    
432      linelength = 0;
433      while (t < endptr && *t++ != '\n') linelength++;
434      length = multiline? endptr - ptr : linelength;
435    
436      /* Run through all the patterns until one matches. Note that we don't include
437      the final newline in the subject string. */
438    
439    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;    for (i = 0; !match && i < pattern_count; i++)
440    if (match && whole_lines && offsets[1] != length) match = FALSE;      {
441        match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
442          offsets, 99) >= 0;
443        }
444    
445      /* If it's a match or a not-match (as required), print what's wanted. */
446    
447    if (match != invert)    if (match != invert)
448      {      {
449        BOOL hyphenprinted = FALSE;
450    
451        if (filenames_nomatch_only) return 1;
452    
453      if (count_only) count++;      if (count_only) count++;
454    
455      else if (filenames_only)      else if (filenames_only)
456        {        {
457        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        fprintf(stdout, "%s\n", printname);
458        return 0;        return 0;
459        }        }
460    
461      else if (silent) return 0;      else if (quiet) return 0;
462    
463      else      else
464        {        {
465        if (name != NULL) fprintf(stdout, "%s:", name);        /* See if there is a requirement to print some "after" lines from a
466          previous match. We never print any overlaps. */
467    
468          if (after_context > 0 && lastmatchnumber > 0)
469            {
470            int linecount = 0;
471            char *p = lastmatchrestart;
472    
473            while (p < ptr && linecount < after_context)
474              {
475              while (*p != '\n') p++;
476              p++;
477              linecount++;
478              }
479    
480            /* It is important to advance lastmatchrestart during this printing so
481            that it interacts correctly with any "before" printing below. */
482    
483            while (lastmatchrestart < p)
484              {
485              char *pp = lastmatchrestart;
486              if (printname != NULL) fprintf(stdout, "%s-", printname);
487              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
488              while (*pp != '\n') pp++;
489              fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
490              lastmatchrestart = pp + 1;
491              }
492            if (lastmatchrestart != ptr) hyphenpending = TRUE;
493            }
494    
495          /* If there were non-contiguous lines printed above, insert hyphens. */
496    
497          if (hyphenpending)
498            {
499            fprintf(stdout, "--\n");
500            hyphenpending = FALSE;
501            hyphenprinted = TRUE;
502            }
503    
504          /* See if there is a requirement to print some "before" lines for this
505          match. Again, don't print overlaps. */
506    
507          if (before_context > 0)
508            {
509            int linecount = 0;
510            char *p = ptr;
511    
512            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
513                   linecount++ < before_context)
514              {
515              p--;
516              while (p > buffer && p[-1] != '\n') p--;
517              }
518    
519            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
520              fprintf(stdout, "--\n");
521    
522            while (p < ptr)
523              {
524              char *pp = p;
525              if (printname != NULL) fprintf(stdout, "%s-", printname);
526              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
527              while (*pp != '\n') pp++;
528              fprintf(stdout, "%.*s", pp - p + 1, p);
529              p = pp + 1;
530              }
531            }
532    
533          /* Now print the matching line(s); ensure we set hyphenpending at the end
534          of the file. */
535    
536          endhyphenpending = TRUE;
537          if (printname != NULL) fprintf(stdout, "%s:", printname);
538        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
539        fprintf(stdout, "%s\n", buffer);  
540          /* In multiline mode, we want to print to the end of the line in which
541          the end of the matched string is found, so we adjust linelength and the
542          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
543          start of the match will always be before the first \n character. */
544    
545          if (multiline)
546            {
547            char *endmatch = ptr + offsets[1];
548            t = ptr;
549            while (t < endmatch) { if (*t++ == '\n') linenumber++; }
550            while (endmatch < endptr && *endmatch != '\n') endmatch++;
551            linelength = endmatch - ptr;
552            }
553    
554          fprintf(stdout, "%.*s\n", linelength, ptr);
555        }        }
556    
557      rc = 0;      rc = 0;    /* Had some success */
558    
559        /* Remember where the last match happened for after_context. We remember
560        where we are about to restart, and that line's number. */
561    
562        lastmatchrestart = ptr + linelength + 1;
563        lastmatchnumber = linenumber + 1;
564      }      }
565    
566      /* Advance to after the newline and increment the line number. */
567    
568      ptr += linelength + 1;
569      linenumber++;
570    
571      /* If we haven't yet reached the end of the file (the buffer is full), and
572      the current point is in the top 1/3 of the buffer, slide the buffer down by
573      1/3 and refill it. Before we do this, if some unprinted "after" lines are
574      about to be lost, print them. */
575    
576      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
577        {
578        if (after_context > 0 &&
579            lastmatchnumber > 0 &&
580            lastmatchrestart < buffer + MBUFTHIRD)
581          {
582          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
583          lastmatchnumber = 0;
584          }
585    
586        /* Now do the shuffle */
587    
588        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
589        ptr -= MBUFTHIRD;
590        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
591        endptr = buffer + bufflength;
592    
593        /* Adjust any last match point */
594    
595        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
596        }
597      }     /* Loop through the whole file */
598    
599    /* End of file; print final "after" lines if wanted; do_after_lines sets
600    hyphenpending if it prints something. */
601    
602    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
603    hyphenpending |= endhyphenpending;
604    
605    /* Print the file name if we are looking for those without matches and there
606    were none. If we found a match, we won't have got this far. */
607    
608    if (filenames_nomatch_only)
609      {
610      fprintf(stdout, "%s\n", printname);
611      return 0;
612    }    }
613    
614    /* Print the match count if wanted */
615    
616  if (count_only)  if (count_only)
617    {    {
618    if (name != NULL) fprintf(stdout, "%s:", name);    if (printname != NULL) fprintf(stdout, "%s:", printname);
619    fprintf(stdout, "%d\n", count);    fprintf(stdout, "%d\n", count);
620    }    }
621    
# Line 114  return rc; Line 624  return rc;
624    
625    
626    
627    /*************************************************
628    *     Grep a file or recurse into a directory    *
629    *************************************************/
630    
631    /* Given a path name, if it's a directory, scan all the files if we are
632    recursing; if it's a file, grep it.
633    
634    Arguments:
635      pathname          the path to investigate
636      dir_recurse       TRUE if recursing is wanted (-r)
637      show_filenames    TRUE if file names are wanted for multiple files, except
638                          for the only file at top level when not filenames_only
639      only_one_at_top   TRUE if the path is the only one at toplevel
640    
641    Returns:   0 if there was at least one match
642               1 if there were no matches
643               2 there was some kind of error
644    
645    However, file opening failures are suppressed if "silent" is set.
646    */
647    
648    static int
649    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,
650      BOOL only_one_at_top)
651    {
652    int rc = 1;
653    int sep;
654    FILE *in;
655    char *printname;
656    
657    /* If the file name is "-" we scan stdin */
658    
659    if (strcmp(pathname, "-") == 0)
660      {
661      return pcregrep(stdin,
662        (filenames_only || filenames_nomatch_only ||
663        (show_filenames && !only_one_at_top))?
664          stdin_name : NULL);
665      }
666    
667    /* If the file is a directory and we are recursing, scan each file within it,
668    subject to any include or exclude patterns that were set. The scanning code is
669    localized so it can be made system-specific. */
670    
671    if ((sep = isdirectory(pathname)) != 0 && dir_recurse)
672      {
673      char buffer[1024];
674      char *nextfile;
675      directory_type *dir = opendirectory(pathname);
676    
677      if (dir == NULL)
678        {
679        if (!silent)
680          fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
681            strerror(errno));
682        return 2;
683        }
684    
685      while ((nextfile = readdirectory(dir)) != NULL)
686        {
687        int frc, blen;
688        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
689        blen = strlen(buffer);
690    
691        if (exclude_compiled != NULL &&
692            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
693          continue;
694    
695        if (include_compiled != NULL &&
696            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
697          continue;
698    
699        frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
700        if (frc > 1) rc = frc;
701         else if (frc == 0 && rc == 1) rc = 0;
702        }
703    
704      closedirectory(dir);
705      return rc;
706      }
707    
708    /* If the file is not a directory, or we are not recursing, scan it. If this is
709    the first and only argument at top level, we don't show the file name (unless
710    we are only showing the file name). Otherwise, control is via the
711    show_filenames variable. */
712    
713    in = fopen(pathname, "r");
714    if (in == NULL)
715      {
716      if (!silent)
717        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
718          strerror(errno));
719      return 2;
720      }
721    
722    printname =  (filenames_only || filenames_nomatch_only ||
723      (show_filenames && !only_one_at_top))? pathname : NULL;
724    
725    rc = pcregrep(in, printname);
726    
727    fclose(in);
728    return rc;
729    }
730    
731    
732    
733    
734  /*************************************************  /*************************************************
735  *                Usage function                  *  *                Usage function                  *
# Line 122  return rc; Line 738  return rc;
738  static int  static int
739  usage(int rc)  usage(int rc)
740  {  {
741  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");
742    fprintf(stderr, "Type `pcregrep --help' for more information.\n");
743  return rc;  return rc;
744  }  }
745    
# Line 130  return rc; Line 747  return rc;
747    
748    
749  /*************************************************  /*************************************************
750    *                Help function                   *
751    *************************************************/
752    
753    static void
754    help(void)
755    {
756    option_item *op;
757    
758    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
759    printf("Search for PATTERN in each FILE or standard input.\n");
760    printf("PATTERN must be present if -f is not used.\n");
761    printf("\"-\" can be used as a file name to mean STDIN.\n");
762    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
763    
764    printf("Options:\n");
765    
766    for (op = optionlist; op->one_char != 0; op++)
767      {
768      int n;
769      char s[4];
770      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
771      printf("  %s --%s%n", s, op->long_name, &n);
772      n = 30 - n;
773      if (n < 1) n = 1;
774      printf("%.*s%s\n", n, "                    ", op->help_text);
775      }
776    
777    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
778    printf("trailing white space is removed and blank lines are ignored.\n");
779    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
780    
781    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
782    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
783    }
784    
785    
786    
787    
788    /*************************************************
789    *    Handle a single-letter, no data option      *
790    *************************************************/
791    
792    static int
793    handle_option(int letter, int options)
794    {
795    switch(letter)
796      {
797      case -1:  help(); exit(0);
798      case 'c': count_only = TRUE; break;
799      case 'h': filenames = FALSE; break;
800      case 'i': options |= PCRE_CASELESS; break;
801      case 'l': filenames_only = TRUE; break;
802      case 'L': filenames_nomatch_only = TRUE; break;
803      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
804      case 'n': number = TRUE; break;
805      case 'q': quiet = TRUE; break;
806      case 'r': recurse = TRUE; break;
807      case 's': silent = TRUE; break;
808      case 'u': options |= PCRE_UTF8; break;
809      case 'v': invert = TRUE; break;
810      case 'w': word_match = TRUE; break;
811      case 'x': whole_lines = TRUE; break;
812    
813      case 'V':
814      fprintf(stderr, "pcregrep version %s using ", VERSION);
815      fprintf(stderr, "PCRE version %s\n", pcre_version());
816      exit(0);
817      break;
818    
819      default:
820      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
821      exit(usage(2));
822      }
823    
824    return options;
825    }
826    
827    
828    
829    
830    /*************************************************
831  *                Main program                    *  *                Main program                    *
832  *************************************************/  *************************************************/
833    
834    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
835    
836  int  int
837  main(int argc, char **argv)  main(int argc, char **argv)
838  {  {
839  int i;  int i, j;
840  int rc = 1;  int rc = 1;
841  int options = 0;  int options = 0;
842  int errptr;  int errptr;
843  const char *error;  const char *error;
844  BOOL filenames = TRUE;  BOOL only_one_at_top;
845    
846  /* Process the options */  /* Process the options */
847    
848  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
849    {    {
850    char *s;    option_item *op = NULL;
851      char *option_data = (char *)"";    /* default to keep compiler happy */
852      BOOL longop;
853      BOOL longopwasequals = FALSE;
854    
855    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
856    s = argv[i] + 1;  
857    while (*s != 0)    /* If we hit an argument that is just "-", it may be a reference to STDIN,
858      but only if we have previously had -f to define the patterns. */
859    
860      if (argv[i][1] == 0)
861      {      {
862      switch (*s++)      if (pattern_filename != NULL) break;
863          else exit(usage(2));
864        }
865    
866      /* Handle a long name option, or -- to terminate the options */
867    
868      if (argv[i][1] == '-')
869        {
870        char *arg = argv[i] + 2;
871        char *argequals = strchr(arg, '=');
872    
873        if (*arg == 0)    /* -- terminates options */
874        {        {
875        case 'c': count_only = TRUE; break;        i++;
876        case 'h': filenames = FALSE; break;        break;                /* out of the options-handling loop */
877        case 'i': options |= PCRE_CASELESS; break;        }
878        case 'l': filenames_only = TRUE;  
879        case 'n': number = TRUE; break;      longop = TRUE;
       case 's': silent = TRUE; break;  
       case 'v': invert = TRUE; break;  
       case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
880    
881        case 'V':      /* Some long options have data that follows after =, for example file=name.
882        fprintf(stderr, "PCRE version %s\n", pcre_version());      Some options have variations in the long name spelling: specifically, we
883        break;      allow "regexp" because GNU grep allows it, though I personally go along
884        with Jeff Friedl in preferring "regex" without the "p". These options are
885        entered in the table as "regex(p)". No option is in both these categories,
886        fortunately. */
887    
888        default:      for (op = optionlist; op->one_char != 0; op++)
889        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);        {
890        return usage(2);        char *opbra = strchr(op->long_name, '(');
891          char *equals = strchr(op->long_name, '=');
892          if (opbra == NULL)     /* Not a (p) case */
893            {
894            if (equals == NULL)  /* Not thing=data case */
895              {
896              if (strcmp(arg, op->long_name) == 0) break;
897              }
898            else                 /* Special case xxx=data */
899              {
900              int oplen = equals - op->long_name;
901              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
902              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
903                {
904                option_data = arg + arglen;
905                if (*option_data == '=')
906                  {
907                  option_data++;
908                  longopwasequals = TRUE;
909                  }
910                break;
911                }
912              }
913            }
914          else                   /* Special case xxxx(p) */
915            {
916            char buff1[24];
917            char buff2[24];
918            int baselen = opbra - op->long_name;
919            sprintf(buff1, "%.*s", baselen, op->long_name);
920            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
921              opbra + 1);
922            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
923              break;
924            }
925          }
926    
927        if (op->one_char == 0)
928          {
929          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
930          exit(usage(2));
931          }
932        }
933    
934      /* One-char options; many that have no data may be in a single argument; we
935      continue till we hit the last one or one that needs data. */
936    
937      else
938        {
939        char *s = argv[i] + 1;
940        longop = FALSE;
941        while (*s != 0)
942          {
943          for (op = optionlist; op->one_char != 0; op++)
944            { if (*s == op->one_char) break; }
945          if (op->one_char == 0)
946            {
947            fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
948              *s, argv[i]);
949            exit(usage(2));
950            }
951          if (op->type != OP_NODATA || s[1] == 0)
952            {
953            option_data = s+1;
954            break;
955            }
956          options = handle_option(*s++, options);
957          }
958        }
959    
960      /* At this point we should have op pointing to a matched option */
961    
962      if (op->type == OP_NODATA)
963        options = handle_option(op->one_char, options);
964      else
965        {
966        if (*option_data == 0)
967          {
968          if (i >= argc - 1 || longopwasequals)
969            {
970            fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
971            exit(usage(2));
972            }
973          option_data = argv[++i];
974          }
975    
976        if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else
977          {
978          char *endptr;
979          int n = strtoul(option_data, &endptr, 10);
980          if (*endptr != 0)
981            {
982            if (longop)
983              fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",
984                option_data, op->long_name);
985            else
986              fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
987                option_data, op->one_char);
988            exit(usage(2));
989            }
990          *((int *)op->dataptr) = n;
991        }        }
992      }      }
993    }    }
994    
995  /* There must be at least a regexp argument */  /* Options have been decoded. If -C was used, its value is used as a default
996    for -A and -B. */
997    
998  if (i >= argc) return usage(0);  if (both_context > 0)
999      {
1000      if (after_context == 0) after_context = both_context;
1001      if (before_context == 0) before_context = both_context;
1002      }
1003    
1004  /* Compile the regular expression. */  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1005    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1006    
1007  pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  if (pattern_list == NULL || hints_list == NULL)
 if (pattern == NULL)  
1008    {    {
1009    fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);    fprintf(stderr, "pcregrep: malloc failed\n");
1010    return 2;    return 2;
1011    }    }
1012    
1013  /* Study the regular expression, as we will be running it may times */  /* Compile the regular expression(s). */
1014    
1015  hints = pcre_study(pattern, 0, &error);  if (pattern_filename != NULL)
 if (error != NULL)  
1016    {    {
1017    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);    FILE *f = fopen(pattern_filename, "r");
1018    return 2;    char buffer[MBUFTHIRD + 16];
1019      char *rdstart;
1020      int adjust = 0;
1021    
1022      if (f == NULL)
1023        {
1024        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1025          strerror(errno));
1026        return 2;
1027        }
1028    
1029      if (whole_lines)
1030        {
1031        strcpy(buffer, "^(?:");
1032        adjust = 4;
1033        }
1034      else if (word_match)
1035        {
1036        strcpy(buffer, "\\b");
1037        adjust = 2;
1038        }
1039    
1040      rdstart = buffer + adjust;
1041      while (fgets(rdstart, MBUFTHIRD, f) != NULL)
1042        {
1043        char *s = rdstart + (int)strlen(rdstart);
1044        if (pattern_count >= MAX_PATTERN_COUNT)
1045          {
1046          fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
1047            MAX_PATTERN_COUNT);
1048          return 2;
1049          }
1050        while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;
1051        if (s == rdstart) continue;
1052        if (whole_lines) strcpy(s, ")$");
1053          else if (word_match)strcpy(s, "\\b");
1054            else *s = 0;
1055        pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
1056          &errptr, NULL);
1057        if (pattern_list[pattern_count++] == NULL)
1058          {
1059          fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
1060            pattern_count, errptr - adjust, error);
1061          return 2;
1062          }
1063        }
1064      fclose(f);
1065    }    }
1066    
1067  /* If there are no further arguments, do the business on stdin and exit */  /* If no file name, a single regex must be given inline. */
1068    
1069  if (i >= argc) return pcregrep(stdin, NULL);  else
1070      {
1071      char buffer[MBUFTHIRD + 16];
1072      char *pat;
1073      int adjust = 0;
1074    
1075  /* Otherwise, work through the remaining arguments as files. If there is only    if (i >= argc) return usage(2);
 one, don't give its name on the output. */  
1076    
1077  if (i == argc - 1) filenames = FALSE;    if (whole_lines)
1078  if (filenames_only) filenames = TRUE;      {
1079        sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);
1080        pat = buffer;
1081        adjust = 4;
1082        }
1083      else if (word_match)
1084        {
1085        sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);
1086        pat = buffer;
1087        adjust = 2;
1088        }
1089      else pat = argv[i++];
1090    
1091  for (; i < argc; i++)    pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);
1092    
1093      if (pattern_list[0] == NULL)
1094        {
1095        fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",
1096          errptr - adjust, error);
1097        return 2;
1098        }
1099      pattern_count++;
1100      }
1101    
1102    /* Study the regular expressions, as we will be running them many times */
1103    
1104    for (j = 0; j < pattern_count; j++)
1105    {    {
1106    FILE *in = fopen(argv[i], "r");    hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1107    if (in == NULL)    if (error != NULL)
1108      {      {
1109      fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));      char s[16];
1110      rc = 2;      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1111        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1112        return 2;
1113      }      }
1114    else    }
1115    
1116    /* If there are include or exclude patterns, compile them. */
1117    
1118    if (exclude_pattern != NULL)
1119      {
1120      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);
1121      if (exclude_compiled == NULL)
1122        {
1123        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1124          errptr, error);
1125        return 2;
1126        }
1127      }
1128    
1129    if (include_pattern != NULL)
1130      {
1131      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);
1132      if (include_compiled == NULL)
1133      {      {
1134      int frc = pcregrep(in, filenames? argv[i] : NULL);      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1135      if (frc == 0 && rc == 1) rc = 0;        errptr, error);
1136      fclose(in);      return 2;
1137      }      }
1138    }    }
1139    
1140    /* If there are no further arguments, do the business on stdin and exit */
1141    
1142    if (i >= argc) return pcregrep(stdin,
1143      (filenames_only || filenames_nomatch_only)? stdin_name : NULL);
1144    
1145    /* Otherwise, work through the remaining arguments as files or directories.
1146    Pass in the fact that there is only one argument at top level - this suppresses
1147    the file name if the argument is not a directory and filenames_only is not set.
1148    */
1149    
1150    only_one_at_top = (i == argc - 1);
1151    
1152    for (; i < argc; i++)
1153      {
1154      int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
1155      if (frc > 1) rc = frc;
1156        else if (frc == 0 && rc == 1) rc = 0;
1157      }
1158    
1159  return rc;  return rc;
1160  }  }
1161    
1162  /* End */  /* End of pcregrep */

Legend:
Removed from v.49  
changed lines
  Added in v.77

  ViewVC Help
Powered by ViewVC 1.1.5