/[pcre]/code/tags/pcre-6.6/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-6.6/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2005 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #include <ctype.h>
41  #include <stdio.h>  #include <stdio.h>
42  #include <string.h>  #include <string.h>
43  #include <stdlib.h>  #include <stdlib.h>
44  #include <errno.h>  #include <errno.h>
45    
46    #include <sys/types.h>
47    #include <sys/stat.h>
48    #include <unistd.h>
49    
50  #include "config.h"  #include "config.h"
51  #include "pcre.h"  #include "pcre.h"
52    
# Line 17  its pattern matching. */ Line 55  its pattern matching. */
55    
56  typedef int BOOL;  typedef int BOOL;
57    
58    #define VERSION "4.1 05-Sep-2005"
59    #define MAX_PATTERN_COUNT 100
60    
61    #if BUFSIZ > 8192
62    #define MBUFTHIRD BUFSIZ
63    #else
64    #define MBUFTHIRD 8192
65    #endif
66    
67    
68    
69  /*************************************************  /*************************************************
70  *               Global variables                 *  *               Global variables                 *
71  *************************************************/  *************************************************/
72    
73  static pcre *pattern;  static char *pattern_filename = NULL;
74  static pcre_extra *hints;  static char *stdin_name = (char *)"(standard input)";
75    static int  pattern_count = 0;
76    static pcre **pattern_list;
77    static pcre_extra **hints_list;
78    
79    static char *include_pattern = NULL;
80    static char *exclude_pattern = NULL;
81    
82    static pcre *include_compiled = NULL;
83    static pcre *exclude_compiled = NULL;
84    
85    static int after_context = 0;
86    static int before_context = 0;
87    static int both_context = 0;
88    
89  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
90    static BOOL filenames = TRUE;
91  static BOOL filenames_only = FALSE;  static BOOL filenames_only = FALSE;
92    static BOOL filenames_nomatch_only = FALSE;
93    static BOOL hyphenpending = FALSE;
94  static BOOL invert = FALSE;  static BOOL invert = FALSE;
95    static BOOL multiline = FALSE;
96  static BOOL number = FALSE;  static BOOL number = FALSE;
97    static BOOL quiet = FALSE;
98    static BOOL recurse = FALSE;
99  static BOOL silent = FALSE;  static BOOL silent = FALSE;
100  static BOOL whole_lines = FALSE;  static BOOL whole_lines = FALSE;
101    static BOOL word_match = FALSE;
102    
103    /* Structure for options and list of them */
104    
105    enum { OP_NODATA, OP_STRING, OP_NUMBER };
106    
107    typedef struct option_item {
108      int type;
109      int one_char;
110      void *dataptr;
111      const char *long_name;
112      const char *help_text;
113    } option_item;
114    
115    static option_item optionlist[] = {
116      { OP_NODATA, -1,  NULL,              "",              "  terminate options" },
117      { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },
118      { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },
119      { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },
120      { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },
121      { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },
122      { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },
123      { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },
124      { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },
125      { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },
126      { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },
127      { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },
128      { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },
129      { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },
130      { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },
131      { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },
132      { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
133      { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },
134      { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },
135      { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },
136      { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },
137      { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },
138      { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },
139      { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },
140      { OP_NODATA, 0,   NULL,               NULL,            NULL }
141    };
142    
143    
144    /*************************************************
145    *       Functions for directory scanning         *
146    *************************************************/
147    
148    /* These functions are defined so that they can be made system specific,
149    although at present the only ones are for Unix, Win32, and for "no directory
150    recursion support". */
151    
152    
153    /************* Directory scanning in Unix ***********/
154    
155    #if IS_UNIX
156    #include <sys/types.h>
157    #include <sys/stat.h>
158    #include <dirent.h>
159    
160    typedef DIR directory_type;
161    
162    static int
163    isdirectory(char *filename)
164    {
165    struct stat statbuf;
166    if (stat(filename, &statbuf) < 0)
167      return 0;        /* In the expectation that opening as a file will fail */
168    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
169    }
170    
171    static directory_type *
172    opendirectory(char *filename)
173    {
174    return opendir(filename);
175    }
176    
177    static char *
178    readdirectory(directory_type *dir)
179    {
180    for (;;)
181      {
182      struct dirent *dent = readdir(dir);
183      if (dent == NULL) return NULL;
184      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
185        return dent->d_name;
186      }
187    return NULL;   /* Keep compiler happy; never executed */
188    }
189    
190    static void
191    closedirectory(directory_type *dir)
192    {
193    closedir(dir);
194    }
195    
196    
197    /************* Directory scanning in Win32 ***********/
198    
199    /* I (Philip Hazel) have no means of testing this code. It was contributed by
200    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
201    when it did not exist. */
202    
203    
204    #elif HAVE_WIN32API
205    
206    #ifndef STRICT
207    # define STRICT
208    #endif
209    #ifndef WIN32_LEAN_AND_MEAN
210    # define WIN32_LEAN_AND_MEAN
211    #endif
212    #ifndef INVALID_FILE_ATTRIBUTES
213    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
214    #endif
215    
216    #include <windows.h>
217    
218    typedef struct directory_type
219    {
220    HANDLE handle;
221    BOOL first;
222    WIN32_FIND_DATA data;
223    } directory_type;
224    
225    int
226    isdirectory(char *filename)
227    {
228    DWORD attr = GetFileAttributes(filename);
229    if (attr == INVALID_FILE_ATTRIBUTES)
230      return 0;
231    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
232    }
233    
234    directory_type *
235    opendirectory(char *filename)
236    {
237    size_t len;
238    char *pattern;
239    directory_type *dir;
240    DWORD err;
241    len = strlen(filename);
242    pattern = (char *) malloc(len + 3);
243    dir = (directory_type *) malloc(sizeof(*dir));
244    if ((pattern == NULL) || (dir == NULL))
245      {
246      fprintf(stderr, "pcregrep: malloc failed\n");
247      exit(2);
248      }
249    memcpy(pattern, filename, len);
250    memcpy(&(pattern[len]), "\\*", 3);
251    dir->handle = FindFirstFile(pattern, &(dir->data));
252    if (dir->handle != INVALID_HANDLE_VALUE)
253      {
254      free(pattern);
255      dir->first = TRUE;
256      return dir;
257      }
258    err = GetLastError();
259    free(pattern);
260    free(dir);
261    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
262    return NULL;
263    }
264    
265    char *
266    readdirectory(directory_type *dir)
267    {
268    for (;;)
269      {
270      if (!dir->first)
271        {
272        if (!FindNextFile(dir->handle, &(dir->data)))
273          return NULL;
274        }
275      else
276        {
277        dir->first = FALSE;
278        }
279      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
280        return dir->data.cFileName;
281      }
282    #ifndef _MSC_VER
283    return NULL;   /* Keep compiler happy; never executed */
284    #endif
285    }
286    
287    void
288    closedirectory(directory_type *dir)
289    {
290    FindClose(dir->handle);
291    free(dir);
292    }
293    
294    
295    /************* Directory scanning when we can't do it ***********/
296    
297    /* The type is void, and apart from isdirectory(), the functions do nothing. */
298    
299    #else
300    
301    typedef void directory_type;
302    
303    int isdirectory(char *filename) { return FALSE; }
304    directory_type * opendirectory(char *filename) {}
305    char *readdirectory(directory_type *dir) {}
306    void closedirectory(directory_type *dir) {}
307    
308    #endif
309    
310    
311    
# Line 58  return sys_errlist[n]; Line 332  return sys_errlist[n];
332    
333    
334  /*************************************************  /*************************************************
335  *              Grep an individual file           *  *       Print the previous "after" lines         *
336    *************************************************/
337    
338    /* This is called if we are about to lose said lines because of buffer filling,
339    and at the end of the file.
340    
341    Arguments:
342      lastmatchnumber   the number of the last matching line, plus one
343      lastmatchrestart  where we restarted after the last match
344      endptr            end of available data
345      printname         filename for printing
346    
347    Returns:            nothing
348    */
349    
350    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
351      char *endptr, char *printname)
352    {
353    if (after_context > 0 && lastmatchnumber > 0)
354      {
355      int count = 0;
356      while (lastmatchrestart < endptr && count++ < after_context)
357        {
358        char *pp = lastmatchrestart;
359        if (printname != NULL) fprintf(stdout, "%s-", printname);
360        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
361        while (*pp != '\n') pp++;
362        fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
363        lastmatchrestart = pp + 1;
364        }
365      hyphenpending = TRUE;
366      }
367    }
368    
369    
370    
371    /*************************************************
372    *            Grep an individual file             *
373  *************************************************/  *************************************************/
374    
375    /* This is called from grep_or_recurse() below. It uses a buffer that is three
376    times the value of MBUFTHIRD. The matching point is never allowed to stray into
377    the top third of the buffer, thus keeping more of the file available for
378    context printing or for multiline scanning. For large files, the pointer will
379    be in the middle third most of the time, so the bottom third is available for
380    "before" context printing.
381    
382    Arguments:
383      in           the fopened FILE stream
384      printname    the file name if it is to be printed for each match
385                   or NULL if the file name is not to be printed
386                   it cannot be NULL if filenames[_nomatch]_only is set
387    
388    Returns:       0 if there was at least one match
389                   1 otherwise (no matches)
390    */
391    
392  static int  static int
393  pcregrep(FILE *in, char *name)  pcregrep(FILE *in, char *printname)
394  {  {
395  int rc = 1;  int rc = 1;
396  int linenumber = 0;  int linenumber = 1;
397    int lastmatchnumber = 0;
398  int count = 0;  int count = 0;
399  int offsets[99];  int offsets[99];
400  char buffer[BUFSIZ];  char *lastmatchrestart = NULL;
401    char buffer[3*MBUFTHIRD];
402    char *ptr = buffer;
403    char *endptr;
404    size_t bufflength;
405    BOOL endhyphenpending = FALSE;
406    
407    /* Do the first read into the start of the buffer and set up the pointer to
408    end of what we have. */
409    
410    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
411    endptr = buffer + bufflength;
412    
413    /* Loop while the current pointer is not at the end of the file. For large
414    files, endptr will be at the end of the buffer when we are in the middle of the
415    file, but ptr will never get there, because as soon as it gets over 2/3 of the
416    way, the buffer is shifted left and re-filled. */
417    
418  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (ptr < endptr)
419    {    {
420    BOOL match;    int i;
421    int length = (int)strlen(buffer);    BOOL match = FALSE;
422    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    char *t = ptr;
423    linenumber++;    size_t length, linelength;
424    
425      /* At this point, ptr is at the start of a line. We need to find the length
426      of the subject string to pass to pcre_exec(). In multiline mode, it is the
427      length remainder of the data in the buffer. Otherwise, it is the length of
428      the next line. After matching, we always advance by the length of the next
429      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
430      that any match is constrained to be in the first line. */
431    
432      linelength = 0;
433      while (t < endptr && *t++ != '\n') linelength++;
434      length = multiline? endptr - ptr : linelength;
435    
436      /* Run through all the patterns until one matches. Note that we don't include
437      the final newline in the subject string. */
438    
439    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;    for (i = 0; !match && i < pattern_count; i++)
440    if (match && whole_lines && offsets[1] != length) match = FALSE;      {
441        match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
442          offsets, 99) >= 0;
443        }
444    
445      /* If it's a match or a not-match (as required), print what's wanted. */
446    
447    if (match != invert)    if (match != invert)
448      {      {
449        BOOL hyphenprinted = FALSE;
450    
451        if (filenames_nomatch_only) return 1;
452    
453      if (count_only) count++;      if (count_only) count++;
454    
455      else if (filenames_only)      else if (filenames_only)
456        {        {
457        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        fprintf(stdout, "%s\n", printname);
458        return 0;        return 0;
459        }        }
460    
461      else if (silent) return 0;      else if (quiet) return 0;
462    
463      else      else
464        {        {
465        if (name != NULL) fprintf(stdout, "%s:", name);        /* See if there is a requirement to print some "after" lines from a
466          previous match. We never print any overlaps. */
467    
468          if (after_context > 0 && lastmatchnumber > 0)
469            {
470            int linecount = 0;
471            char *p = lastmatchrestart;
472    
473            while (p < ptr && linecount < after_context)
474              {
475              while (*p != '\n') p++;
476              p++;
477              linecount++;
478              }
479    
480            /* It is important to advance lastmatchrestart during this printing so
481            that it interacts correctly with any "before" printing below. */
482    
483            while (lastmatchrestart < p)
484              {
485              char *pp = lastmatchrestart;
486              if (printname != NULL) fprintf(stdout, "%s-", printname);
487              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
488              while (*pp != '\n') pp++;
489              fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
490              lastmatchrestart = pp + 1;
491              }
492            if (lastmatchrestart != ptr) hyphenpending = TRUE;
493            }
494    
495          /* If there were non-contiguous lines printed above, insert hyphens. */
496    
497          if (hyphenpending)
498            {
499            fprintf(stdout, "--\n");
500            hyphenpending = FALSE;
501            hyphenprinted = TRUE;
502            }
503    
504          /* See if there is a requirement to print some "before" lines for this
505          match. Again, don't print overlaps. */
506    
507          if (before_context > 0)
508            {
509            int linecount = 0;
510            char *p = ptr;
511    
512            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
513                   linecount++ < before_context)
514              {
515              p--;
516              while (p > buffer && p[-1] != '\n') p--;
517              }
518    
519            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
520              fprintf(stdout, "--\n");
521    
522            while (p < ptr)
523              {
524              char *pp = p;
525              if (printname != NULL) fprintf(stdout, "%s-", printname);
526              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
527              while (*pp != '\n') pp++;
528              fprintf(stdout, "%.*s", pp - p + 1, p);
529              p = pp + 1;
530              }
531            }
532    
533          /* Now print the matching line(s); ensure we set hyphenpending at the end
534          of the file if any context lines are being output. */
535    
536          if (after_context > 0 || before_context > 0)
537            endhyphenpending = TRUE;
538    
539          if (printname != NULL) fprintf(stdout, "%s:", printname);
540        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
541        fprintf(stdout, "%s\n", buffer);  
542          /* In multiline mode, we want to print to the end of the line in which
543          the end of the matched string is found, so we adjust linelength and the
544          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
545          start of the match will always be before the first \n character. */
546    
547          if (multiline)
548            {
549            char *endmatch = ptr + offsets[1];
550            t = ptr;
551            while (t < endmatch) { if (*t++ == '\n') linenumber++; }
552            while (endmatch < endptr && *endmatch != '\n') endmatch++;
553            linelength = endmatch - ptr;
554            }
555    
556          fprintf(stdout, "%.*s\n", linelength, ptr);
557        }        }
558    
559      rc = 0;      rc = 0;    /* Had some success */
560    
561        /* Remember where the last match happened for after_context. We remember
562        where we are about to restart, and that line's number. */
563    
564        lastmatchrestart = ptr + linelength + 1;
565        lastmatchnumber = linenumber + 1;
566      }      }
567    
568      /* Advance to after the newline and increment the line number. */
569    
570      ptr += linelength + 1;
571      linenumber++;
572    
573      /* If we haven't yet reached the end of the file (the buffer is full), and
574      the current point is in the top 1/3 of the buffer, slide the buffer down by
575      1/3 and refill it. Before we do this, if some unprinted "after" lines are
576      about to be lost, print them. */
577    
578      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
579        {
580        if (after_context > 0 &&
581            lastmatchnumber > 0 &&
582            lastmatchrestart < buffer + MBUFTHIRD)
583          {
584          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
585          lastmatchnumber = 0;
586          }
587    
588        /* Now do the shuffle */
589    
590        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
591        ptr -= MBUFTHIRD;
592        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
593        endptr = buffer + bufflength;
594    
595        /* Adjust any last match point */
596    
597        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
598        }
599      }     /* Loop through the whole file */
600    
601    /* End of file; print final "after" lines if wanted; do_after_lines sets
602    hyphenpending if it prints something. */
603    
604    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
605    hyphenpending |= endhyphenpending;
606    
607    /* Print the file name if we are looking for those without matches and there
608    were none. If we found a match, we won't have got this far. */
609    
610    if (filenames_nomatch_only)
611      {
612      fprintf(stdout, "%s\n", printname);
613      return 0;
614    }    }
615    
616    /* Print the match count if wanted */
617    
618  if (count_only)  if (count_only)
619    {    {
620    if (name != NULL) fprintf(stdout, "%s:", name);    if (printname != NULL) fprintf(stdout, "%s:", printname);
621    fprintf(stdout, "%d\n", count);    fprintf(stdout, "%d\n", count);
622    }    }
623    
# Line 114  return rc; Line 626  return rc;
626    
627    
628    
629    /*************************************************
630    *     Grep a file or recurse into a directory    *
631    *************************************************/
632    
633    /* Given a path name, if it's a directory, scan all the files if we are
634    recursing; if it's a file, grep it.
635    
636    Arguments:
637      pathname          the path to investigate
638      dir_recurse       TRUE if recursing is wanted (-r)
639      show_filenames    TRUE if file names are wanted for multiple files, except
640                          for the only file at top level when not filenames_only
641      only_one_at_top   TRUE if the path is the only one at toplevel
642    
643    Returns:   0 if there was at least one match
644               1 if there were no matches
645               2 there was some kind of error
646    
647    However, file opening failures are suppressed if "silent" is set.
648    */
649    
650    static int
651    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,
652      BOOL only_one_at_top)
653    {
654    int rc = 1;
655    int sep;
656    FILE *in;
657    char *printname;
658    
659    /* If the file name is "-" we scan stdin */
660    
661    if (strcmp(pathname, "-") == 0)
662      {
663      return pcregrep(stdin,
664        (filenames_only || filenames_nomatch_only ||
665        (show_filenames && !only_one_at_top))?
666          stdin_name : NULL);
667      }
668    
669    /* If the file is a directory and we are recursing, scan each file within it,
670    subject to any include or exclude patterns that were set. The scanning code is
671    localized so it can be made system-specific. */
672    
673    if ((sep = isdirectory(pathname)) != 0 && dir_recurse)
674      {
675      char buffer[1024];
676      char *nextfile;
677      directory_type *dir = opendirectory(pathname);
678    
679      if (dir == NULL)
680        {
681        if (!silent)
682          fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
683            strerror(errno));
684        return 2;
685        }
686    
687      while ((nextfile = readdirectory(dir)) != NULL)
688        {
689        int frc, blen;
690        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
691        blen = strlen(buffer);
692    
693        if (exclude_compiled != NULL &&
694            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
695          continue;
696    
697        if (include_compiled != NULL &&
698            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
699          continue;
700    
701        frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
702        if (frc > 1) rc = frc;
703         else if (frc == 0 && rc == 1) rc = 0;
704        }
705    
706      closedirectory(dir);
707      return rc;
708      }
709    
710    /* If the file is not a directory, or we are not recursing, scan it. If this is
711    the first and only argument at top level, we don't show the file name (unless
712    we are only showing the file name). Otherwise, control is via the
713    show_filenames variable. */
714    
715    in = fopen(pathname, "r");
716    if (in == NULL)
717      {
718      if (!silent)
719        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
720          strerror(errno));
721      return 2;
722      }
723    
724    printname =  (filenames_only || filenames_nomatch_only ||
725      (show_filenames && !only_one_at_top))? pathname : NULL;
726    
727    rc = pcregrep(in, printname);
728    
729    fclose(in);
730    return rc;
731    }
732    
733    
734    
735    
736  /*************************************************  /*************************************************
737  *                Usage function                  *  *                Usage function                  *
# Line 122  return rc; Line 740  return rc;
740  static int  static int
741  usage(int rc)  usage(int rc)
742  {  {
743  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");
744    fprintf(stderr, "Type `pcregrep --help' for more information.\n");
745  return rc;  return rc;
746  }  }
747    
# Line 130  return rc; Line 749  return rc;
749    
750    
751  /*************************************************  /*************************************************
752    *                Help function                   *
753    *************************************************/
754    
755    static void
756    help(void)
757    {
758    option_item *op;
759    
760    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
761    printf("Search for PATTERN in each FILE or standard input.\n");
762    printf("PATTERN must be present if -f is not used.\n");
763    printf("\"-\" can be used as a file name to mean STDIN.\n");
764    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
765    
766    printf("Options:\n");
767    
768    for (op = optionlist; op->one_char != 0; op++)
769      {
770      int n;
771      char s[4];
772      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
773      printf("  %s --%s%n", s, op->long_name, &n);
774      n = 30 - n;
775      if (n < 1) n = 1;
776      printf("%.*s%s\n", n, "                    ", op->help_text);
777      }
778    
779    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
780    printf("trailing white space is removed and blank lines are ignored.\n");
781    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
782    
783    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
784    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
785    }
786    
787    
788    
789    
790    /*************************************************
791    *    Handle a single-letter, no data option      *
792    *************************************************/
793    
794    static int
795    handle_option(int letter, int options)
796    {
797    switch(letter)
798      {
799      case -1:  help(); exit(0);
800      case 'c': count_only = TRUE; break;
801      case 'h': filenames = FALSE; break;
802      case 'i': options |= PCRE_CASELESS; break;
803      case 'l': filenames_only = TRUE; break;
804      case 'L': filenames_nomatch_only = TRUE; break;
805      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
806      case 'n': number = TRUE; break;
807      case 'q': quiet = TRUE; break;
808      case 'r': recurse = TRUE; break;
809      case 's': silent = TRUE; break;
810      case 'u': options |= PCRE_UTF8; break;
811      case 'v': invert = TRUE; break;
812      case 'w': word_match = TRUE; break;
813      case 'x': whole_lines = TRUE; break;
814    
815      case 'V':
816      fprintf(stderr, "pcregrep version %s using ", VERSION);
817      fprintf(stderr, "PCRE version %s\n", pcre_version());
818      exit(0);
819      break;
820    
821      default:
822      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
823      exit(usage(2));
824      }
825    
826    return options;
827    }
828    
829    
830    
831    
832    /*************************************************
833  *                Main program                    *  *                Main program                    *
834  *************************************************/  *************************************************/
835    
836    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
837    
838  int  int
839  main(int argc, char **argv)  main(int argc, char **argv)
840  {  {
841  int i;  int i, j;
842  int rc = 1;  int rc = 1;
843  int options = 0;  int options = 0;
844  int errptr;  int errptr;
845  const char *error;  const char *error;
846  BOOL filenames = TRUE;  BOOL only_one_at_top;
847    
848  /* Process the options */  /* Process the options */
849    
850  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
851    {    {
852    char *s;    option_item *op = NULL;
853      char *option_data = (char *)"";    /* default to keep compiler happy */
854      BOOL longop;
855      BOOL longopwasequals = FALSE;
856    
857    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
858    s = argv[i] + 1;  
859    while (*s != 0)    /* If we hit an argument that is just "-", it may be a reference to STDIN,
860      but only if we have previously had -f to define the patterns. */
861    
862      if (argv[i][1] == 0)
863        {
864        if (pattern_filename != NULL) break;
865          else exit(usage(2));
866        }
867    
868      /* Handle a long name option, or -- to terminate the options */
869    
870      if (argv[i][1] == '-')
871        {
872        char *arg = argv[i] + 2;
873        char *argequals = strchr(arg, '=');
874    
875        if (*arg == 0)    /* -- terminates options */
876          {
877          i++;
878          break;                /* out of the options-handling loop */
879          }
880    
881        longop = TRUE;
882    
883        /* Some long options have data that follows after =, for example file=name.
884        Some options have variations in the long name spelling: specifically, we
885        allow "regexp" because GNU grep allows it, though I personally go along
886        with Jeff Friedl in preferring "regex" without the "p". These options are
887        entered in the table as "regex(p)". No option is in both these categories,
888        fortunately. */
889    
890        for (op = optionlist; op->one_char != 0; op++)
891          {
892          char *opbra = strchr(op->long_name, '(');
893          char *equals = strchr(op->long_name, '=');
894          if (opbra == NULL)     /* Not a (p) case */
895            {
896            if (equals == NULL)  /* Not thing=data case */
897              {
898              if (strcmp(arg, op->long_name) == 0) break;
899              }
900            else                 /* Special case xxx=data */
901              {
902              int oplen = equals - op->long_name;
903              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
904              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
905                {
906                option_data = arg + arglen;
907                if (*option_data == '=')
908                  {
909                  option_data++;
910                  longopwasequals = TRUE;
911                  }
912                break;
913                }
914              }
915            }
916          else                   /* Special case xxxx(p) */
917            {
918            char buff1[24];
919            char buff2[24];
920            int baselen = opbra - op->long_name;
921            sprintf(buff1, "%.*s", baselen, op->long_name);
922            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
923              opbra + 1);
924            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
925              break;
926            }
927          }
928    
929        if (op->one_char == 0)
930          {
931          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
932          exit(usage(2));
933          }
934        }
935    
936      /* One-char options; many that have no data may be in a single argument; we
937      continue till we hit the last one or one that needs data. */
938    
939      else
940      {      {
941      switch (*s++)      char *s = argv[i] + 1;
942        longop = FALSE;
943        while (*s != 0)
944        {        {
945        case 'c': count_only = TRUE; break;        for (op = optionlist; op->one_char != 0; op++)
946        case 'h': filenames = FALSE; break;          { if (*s == op->one_char) break; }
947        case 'i': options |= PCRE_CASELESS; break;        if (op->one_char == 0)
948        case 'l': filenames_only = TRUE;          {
949        case 'n': number = TRUE; break;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
950        case 's': silent = TRUE; break;            *s, argv[i]);
951        case 'v': invert = TRUE; break;          exit(usage(2));
952        case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;          }
953          if (op->type != OP_NODATA || s[1] == 0)
954            {
955            option_data = s+1;
956            break;
957            }
958          options = handle_option(*s++, options);
959          }
960        }
961    
962      /* At this point we should have op pointing to a matched option */
963    
964        case 'V':    if (op->type == OP_NODATA)
965        fprintf(stderr, "PCRE version %s\n", pcre_version());      options = handle_option(op->one_char, options);
966        break;    else
967        {
968        if (*option_data == 0)
969          {
970          if (i >= argc - 1 || longopwasequals)
971            {
972            fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
973            exit(usage(2));
974            }
975          option_data = argv[++i];
976          }
977    
978        default:      if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else
979        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);        {
980        return usage(2);        char *endptr;
981          int n = strtoul(option_data, &endptr, 10);
982          if (*endptr != 0)
983            {
984            if (longop)
985              fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",
986                option_data, op->long_name);
987            else
988              fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
989                option_data, op->one_char);
990            exit(usage(2));
991            }
992          *((int *)op->dataptr) = n;
993        }        }
994      }      }
995    }    }
996    
997  /* There must be at least a regexp argument */  /* Options have been decoded. If -C was used, its value is used as a default
998    for -A and -B. */
999    
1000  if (i >= argc) return usage(0);  if (both_context > 0)
1001      {
1002      if (after_context == 0) after_context = both_context;
1003      if (before_context == 0) before_context = both_context;
1004      }
1005    
1006  /* Compile the regular expression. */  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1007    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1008    
1009  pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  if (pattern_list == NULL || hints_list == NULL)
 if (pattern == NULL)  
1010    {    {
1011    fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);    fprintf(stderr, "pcregrep: malloc failed\n");
1012    return 2;    return 2;
1013    }    }
1014    
1015  /* Study the regular expression, as we will be running it may times */  /* Compile the regular expression(s). */
1016    
1017  hints = pcre_study(pattern, 0, &error);  if (pattern_filename != NULL)
 if (error != NULL)  
1018    {    {
1019    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);    FILE *f = fopen(pattern_filename, "r");
1020    return 2;    char buffer[MBUFTHIRD + 16];
1021      char *rdstart;
1022      int adjust = 0;
1023    
1024      if (f == NULL)
1025        {
1026        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1027          strerror(errno));
1028        return 2;
1029        }
1030    
1031      if (whole_lines)
1032        {
1033        strcpy(buffer, "^(?:");
1034        adjust = 4;
1035        }
1036      else if (word_match)
1037        {
1038        strcpy(buffer, "\\b");
1039        adjust = 2;
1040        }
1041    
1042      rdstart = buffer + adjust;
1043      while (fgets(rdstart, MBUFTHIRD, f) != NULL)
1044        {
1045        char *s = rdstart + (int)strlen(rdstart);
1046        if (pattern_count >= MAX_PATTERN_COUNT)
1047          {
1048          fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
1049            MAX_PATTERN_COUNT);
1050          return 2;
1051          }
1052        while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;
1053        if (s == rdstart) continue;
1054        if (whole_lines) strcpy(s, ")$");
1055          else if (word_match)strcpy(s, "\\b");
1056            else *s = 0;
1057        pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
1058          &errptr, NULL);
1059        if (pattern_list[pattern_count++] == NULL)
1060          {
1061          fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
1062            pattern_count, errptr - adjust, error);
1063          return 2;
1064          }
1065        }
1066      fclose(f);
1067    }    }
1068    
1069  /* If there are no further arguments, do the business on stdin and exit */  /* If no file name, a single regex must be given inline. */
1070    
1071  if (i >= argc) return pcregrep(stdin, NULL);  else
1072      {
1073      char buffer[MBUFTHIRD + 16];
1074      char *pat;
1075      int adjust = 0;
1076    
1077  /* Otherwise, work through the remaining arguments as files. If there is only    if (i >= argc) return usage(2);
 one, don't give its name on the output. */  
1078    
1079  if (i == argc - 1) filenames = FALSE;    if (whole_lines)
1080  if (filenames_only) filenames = TRUE;      {
1081        sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);
1082        pat = buffer;
1083        adjust = 4;
1084        }
1085      else if (word_match)
1086        {
1087        sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);
1088        pat = buffer;
1089        adjust = 2;
1090        }
1091      else pat = argv[i++];
1092    
1093  for (; i < argc; i++)    pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);
1094    
1095      if (pattern_list[0] == NULL)
1096        {
1097        fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",
1098          errptr - adjust, error);
1099        return 2;
1100        }
1101      pattern_count++;
1102      }
1103    
1104    /* Study the regular expressions, as we will be running them many times */
1105    
1106    for (j = 0; j < pattern_count; j++)
1107    {    {
1108    FILE *in = fopen(argv[i], "r");    hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1109    if (in == NULL)    if (error != NULL)
1110      {      {
1111      fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));      char s[16];
1112      rc = 2;      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1113        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1114        return 2;
1115      }      }
1116    else    }
1117    
1118    /* If there are include or exclude patterns, compile them. */
1119    
1120    if (exclude_pattern != NULL)
1121      {
1122      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);
1123      if (exclude_compiled == NULL)
1124      {      {
1125      int frc = pcregrep(in, filenames? argv[i] : NULL);      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1126      if (frc == 0 && rc == 1) rc = 0;        errptr, error);
1127      fclose(in);      return 2;
1128      }      }
1129    }    }
1130    
1131    if (include_pattern != NULL)
1132      {
1133      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);
1134      if (include_compiled == NULL)
1135        {
1136        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1137          errptr, error);
1138        return 2;
1139        }
1140      }
1141    
1142    /* If there are no further arguments, do the business on stdin and exit */
1143    
1144    if (i >= argc) return pcregrep(stdin,
1145      (filenames_only || filenames_nomatch_only)? stdin_name : NULL);
1146    
1147    /* Otherwise, work through the remaining arguments as files or directories.
1148    Pass in the fact that there is only one argument at top level - this suppresses
1149    the file name if the argument is not a directory and filenames_only is not set.
1150    */
1151    
1152    only_one_at_top = (i == argc - 1);
1153    
1154    for (; i < argc; i++)
1155      {
1156      int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
1157      if (frc > 1) rc = frc;
1158        else if (frc == 0 && rc == 1) rc = 0;
1159      }
1160    
1161  return rc;  return rc;
1162  }  }
1163    
1164  /* End */  /* End of pcregrep */

Legend:
Removed from v.49  
changed lines
  Added in v.85

  ViewVC Help
Powered by ViewVC 1.1.5