/[pcre]/code/tags/pcre-6.6/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-6.6/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2006 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #include <ctype.h>
41    #include <locale.h>
42  #include <stdio.h>  #include <stdio.h>
43  #include <string.h>  #include <string.h>
44  #include <stdlib.h>  #include <stdlib.h>
45  #include <errno.h>  #include <errno.h>
46    
47    #include <sys/types.h>
48    #include <sys/stat.h>
49    #include <unistd.h>
50    
51  #include "config.h"  #include "config.h"
52  #include "pcre.h"  #include "pcre.h"
53    
# Line 17  its pattern matching. */ Line 56  its pattern matching. */
56    
57  typedef int BOOL;  typedef int BOOL;
58    
59    #define VERSION "4.2 09-Jan-2006"
60    #define MAX_PATTERN_COUNT 100
61    
62    #if BUFSIZ > 8192
63    #define MBUFTHIRD BUFSIZ
64    #else
65    #define MBUFTHIRD 8192
66    #endif
67    
68    
69    /* Values for the "filenames" variable, which specifies options for file name
70    output. The order is important; it is assumed that a file name is wanted for
71    all values greater than FN_DEFAULT. */
72    
73    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
74    
75    /* Actions for the -d and -D options */
76    
77    enum { dee_READ, dee_SKIP, dee_RECURSE };
78    enum { DEE_READ, DEE_SKIP };
79    
80    /* Actions for special processing options (flag bits) */
81    
82    #define PO_WORD_MATCH     0x0001
83    #define PO_LINE_MATCH     0x0002
84    #define PO_FIXED_STRINGS  0x0004
85    
86    
87    
88  /*************************************************  /*************************************************
89  *               Global variables                 *  *               Global variables                 *
90  *************************************************/  *************************************************/
91    
92  static pcre *pattern;  /* Jeffrey Friedl has some debugging requirements that are not part of the
93  static pcre_extra *hints;  regular code. */
94    
95    #ifdef JFRIEDL_DEBUG
96    static int S_arg = -1;
97    #endif
98    
99    static char *colour_string = (char *)"1;31";
100    static char *colour_option = NULL;
101    static char *dee_option = NULL;
102    static char *DEE_option = NULL;
103    static char *pattern_filename = NULL;
104    static char *stdin_name = (char *)"(standard input)";
105    static char *locale = NULL;
106    
107    static const unsigned char *pcretables = NULL;
108    
109    static int  pattern_count = 0;
110    static pcre **pattern_list;
111    static pcre_extra **hints_list;
112    
113    static char *include_pattern = NULL;
114    static char *exclude_pattern = NULL;
115    
116    static pcre *include_compiled = NULL;
117    static pcre *exclude_compiled = NULL;
118    
119    static int after_context = 0;
120    static int before_context = 0;
121    static int both_context = 0;
122    static int dee_action = dee_READ;
123    static int DEE_action = DEE_READ;
124    static int error_count = 0;
125    static int filenames = FN_DEFAULT;
126    static int process_options = 0;
127    
128  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
129  static BOOL filenames_only = FALSE;  static BOOL do_colour = FALSE;
130    static BOOL hyphenpending = FALSE;
131  static BOOL invert = FALSE;  static BOOL invert = FALSE;
132    static BOOL multiline = FALSE;
133  static BOOL number = FALSE;  static BOOL number = FALSE;
134    static BOOL only_matching = FALSE;
135    static BOOL quiet = FALSE;
136  static BOOL silent = FALSE;  static BOOL silent = FALSE;
137  static BOOL whole_lines = FALSE;  
138    /* Structure for options and list of them */
139    
140    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
141           OP_PATLIST };
142    
143    typedef struct option_item {
144      int type;
145      int one_char;
146      void *dataptr;
147      const char *long_name;
148      const char *help_text;
149    } option_item;
150    
151    /* Options without a single-letter equivalent get a negative value. This can be
152    used to identify them. */
153    
154    #define N_COLOUR    (-1)
155    #define N_EXCLUDE   (-2)
156    #define N_HELP      (-3)
157    #define N_INCLUDE   (-4)
158    #define N_LABEL     (-5)
159    #define N_LOCALE    (-6)
160    #define N_NULL      (-7)
161    
162    static option_item optionlist[] = {
163      { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
164      { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
165      { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
166      { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
167      { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
168      { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
169      { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
170      { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
171      { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
172      { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
173      { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
174      { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
175      { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
176      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
177      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
178      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
179      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
180      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
181      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
182      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
183      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
184      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
185      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
186      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
187      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
188      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
189      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
190    #ifdef JFRIEDL_DEBUG
191      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
192    #endif
193      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
194      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
195      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
196      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
197      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
198      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
199      { OP_NODATA,    0,        NULL,               NULL,            NULL }
200    };
201    
202    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
203    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
204    that the combination of -w and -x has the same effect as -x on its own, so we
205    can treat them as the same. */
206    
207    static const char *prefix[] = {
208      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
209    
210    static const char *suffix[] = {
211      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
212    
213    
214    
215    /*************************************************
216    *            OS-specific functions               *
217    *************************************************/
218    
219    /* These functions are defined so that they can be made system specific,
220    although at present the only ones are for Unix, Win32, and for "no support". */
221    
222    
223    /************* Directory scanning in Unix ***********/
224    
225    #if IS_UNIX
226    #include <sys/types.h>
227    #include <sys/stat.h>
228    #include <dirent.h>
229    
230    typedef DIR directory_type;
231    
232    static int
233    isdirectory(char *filename)
234    {
235    struct stat statbuf;
236    if (stat(filename, &statbuf) < 0)
237      return 0;        /* In the expectation that opening as a file will fail */
238    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
239    }
240    
241    static directory_type *
242    opendirectory(char *filename)
243    {
244    return opendir(filename);
245    }
246    
247    static char *
248    readdirectory(directory_type *dir)
249    {
250    for (;;)
251      {
252      struct dirent *dent = readdir(dir);
253      if (dent == NULL) return NULL;
254      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
255        return dent->d_name;
256      }
257    return NULL;   /* Keep compiler happy; never executed */
258    }
259    
260    static void
261    closedirectory(directory_type *dir)
262    {
263    closedir(dir);
264    }
265    
266    
267    /************* Test for regular file in Unix **********/
268    
269    static int
270    isregfile(char *filename)
271    {
272    struct stat statbuf;
273    if (stat(filename, &statbuf) < 0)
274      return 1;        /* In the expectation that opening as a file will fail */
275    return (statbuf.st_mode & S_IFMT) == S_IFREG;
276    }
277    
278    
279    /************* Test stdout for being a terminal in Unix **********/
280    
281    static BOOL
282    is_stdout_tty(void)
283    {
284    return isatty(fileno(stdout));
285    }
286    
287    
288    /************* Directory scanning in Win32 ***********/
289    
290    /* I (Philip Hazel) have no means of testing this code. It was contributed by
291    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
292    when it did not exist. */
293    
294    
295    #elif HAVE_WIN32API
296    
297    #ifndef STRICT
298    # define STRICT
299    #endif
300    #ifndef WIN32_LEAN_AND_MEAN
301    # define WIN32_LEAN_AND_MEAN
302    #endif
303    #ifndef INVALID_FILE_ATTRIBUTES
304    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
305    #endif
306    
307    #include <windows.h>
308    
309    typedef struct directory_type
310    {
311    HANDLE handle;
312    BOOL first;
313    WIN32_FIND_DATA data;
314    } directory_type;
315    
316    int
317    isdirectory(char *filename)
318    {
319    DWORD attr = GetFileAttributes(filename);
320    if (attr == INVALID_FILE_ATTRIBUTES)
321      return 0;
322    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
323    }
324    
325    directory_type *
326    opendirectory(char *filename)
327    {
328    size_t len;
329    char *pattern;
330    directory_type *dir;
331    DWORD err;
332    len = strlen(filename);
333    pattern = (char *) malloc(len + 3);
334    dir = (directory_type *) malloc(sizeof(*dir));
335    if ((pattern == NULL) || (dir == NULL))
336      {
337      fprintf(stderr, "pcregrep: malloc failed\n");
338      exit(2);
339      }
340    memcpy(pattern, filename, len);
341    memcpy(&(pattern[len]), "\\*", 3);
342    dir->handle = FindFirstFile(pattern, &(dir->data));
343    if (dir->handle != INVALID_HANDLE_VALUE)
344      {
345      free(pattern);
346      dir->first = TRUE;
347      return dir;
348      }
349    err = GetLastError();
350    free(pattern);
351    free(dir);
352    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
353    return NULL;
354    }
355    
356    char *
357    readdirectory(directory_type *dir)
358    {
359    for (;;)
360      {
361      if (!dir->first)
362        {
363        if (!FindNextFile(dir->handle, &(dir->data)))
364          return NULL;
365        }
366      else
367        {
368        dir->first = FALSE;
369        }
370      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
371        return dir->data.cFileName;
372      }
373    #ifndef _MSC_VER
374    return NULL;   /* Keep compiler happy; never executed */
375    #endif
376    }
377    
378    void
379    closedirectory(directory_type *dir)
380    {
381    FindClose(dir->handle);
382    free(dir);
383    }
384    
385    
386    /************* Test for regular file in Win32 **********/
387    
388    /* I don't know how to do this, or if it can be done; assume all paths are
389    regular if they are not directories. */
390    
391    int isregfile(char *filename)
392    {
393    return !isdirectory(filename)
394    }
395    
396    
397    /************* Test stdout for being a terminal in Win32 **********/
398    
399    /* I don't know how to do this; assume never */
400    
401    static BOOL
402    is_stdout_tty(void)
403    {
404    FALSE;
405    }
406    
407    
408    /************* Directory scanning when we can't do it ***********/
409    
410    /* The type is void, and apart from isdirectory(), the functions do nothing. */
411    
412    #else
413    
414    typedef void directory_type;
415    
416    int isdirectory(char *filename) { return 0; }
417    directory_type * opendirectory(char *filename) {}
418    char *readdirectory(directory_type *dir) {}
419    void closedirectory(directory_type *dir) {}
420    
421    
422    /************* Test for regular when we can't do it **********/
423    
424    /* Assume all files are regular. */
425    
426    int isregfile(char *filename) { return 1; }
427    
428    
429    /************* Test stdout for being a terminal when we can't do it **********/
430    
431    static BOOL
432    is_stdout_tty(void)
433    {
434    return FALSE;
435    }
436    
437    
438    #endif
439    
440    
441    
# Line 58  return sys_errlist[n]; Line 462  return sys_errlist[n];
462    
463    
464  /*************************************************  /*************************************************
465  *              Grep an individual file           *  *       Print the previous "after" lines         *
466    *************************************************/
467    
468    /* This is called if we are about to lose said lines because of buffer filling,
469    and at the end of the file. The data in the line is written using fwrite() so
470    that a binary zero does not terminate it.
471    
472    Arguments:
473      lastmatchnumber   the number of the last matching line, plus one
474      lastmatchrestart  where we restarted after the last match
475      endptr            end of available data
476      printname         filename for printing
477    
478    Returns:            nothing
479    */
480    
481    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
482      char *endptr, char *printname)
483    {
484    if (after_context > 0 && lastmatchnumber > 0)
485      {
486      int count = 0;
487      while (lastmatchrestart < endptr && count++ < after_context)
488        {
489        char *pp = lastmatchrestart;
490        if (printname != NULL) fprintf(stdout, "%s-", printname);
491        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
492        while (*pp != '\n') pp++;
493        fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
494        lastmatchrestart = pp + 1;
495        }
496      hyphenpending = TRUE;
497      }
498    }
499    
500    
501    
502    /*************************************************
503    *            Grep an individual file             *
504  *************************************************/  *************************************************/
505    
506    /* This is called from grep_or_recurse() below. It uses a buffer that is three
507    times the value of MBUFTHIRD. The matching point is never allowed to stray into
508    the top third of the buffer, thus keeping more of the file available for
509    context printing or for multiline scanning. For large files, the pointer will
510    be in the middle third most of the time, so the bottom third is available for
511    "before" context printing.
512    
513    Arguments:
514      in           the fopened FILE stream
515      printname    the file name if it is to be printed for each match
516                   or NULL if the file name is not to be printed
517                   it cannot be NULL if filenames[_nomatch]_only is set
518    
519    Returns:       0 if there was at least one match
520                   1 otherwise (no matches)
521    */
522    
523  static int  static int
524  pcregrep(FILE *in, char *name)  pcregrep(FILE *in, char *printname)
525  {  {
526  int rc = 1;  int rc = 1;
527  int linenumber = 0;  int linenumber = 1;
528    int lastmatchnumber = 0;
529  int count = 0;  int count = 0;
530  int offsets[99];  int offsets[99];
531  char buffer[BUFSIZ];  char *lastmatchrestart = NULL;
532    char buffer[3*MBUFTHIRD];
533    char *ptr = buffer;
534    char *endptr;
535    size_t bufflength;
536    BOOL endhyphenpending = FALSE;
537    
538    /* Do the first read into the start of the buffer and set up the pointer to
539    end of what we have. */
540    
541    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
542    endptr = buffer + bufflength;
543    
544    /* Loop while the current pointer is not at the end of the file. For large
545    files, endptr will be at the end of the buffer when we are in the middle of the
546    file, but ptr will never get there, because as soon as it gets over 2/3 of the
547    way, the buffer is shifted left and re-filled. */
548    
549  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (ptr < endptr)
550    {    {
551    BOOL match;    int i;
552    int length = (int)strlen(buffer);    int mrc = 0;
553    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    BOOL match = FALSE;
554    linenumber++;    char *t = ptr;
555      size_t length, linelength;
556    
557      /* At this point, ptr is at the start of a line. We need to find the length
558      of the subject string to pass to pcre_exec(). In multiline mode, it is the
559      length remainder of the data in the buffer. Otherwise, it is the length of
560      the next line. After matching, we always advance by the length of the next
561      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
562      that any match is constrained to be in the first line. */
563    
564      linelength = 0;
565      while (t < endptr && *t++ != '\n') linelength++;
566      length = multiline? endptr - ptr : linelength;
567    
568    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;    /* Run through all the patterns until one matches. Note that we don't include
569    if (match && whole_lines && offsets[1] != length) match = FALSE;    the final newline in the subject string. */
570    
571      for (i = 0; i < pattern_count; i++)
572        {
573        mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
574          offsets, 99);
575        if (mrc >= 0) { match = TRUE; break; }
576        if (mrc != PCRE_ERROR_NOMATCH)
577          {
578          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
579          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
580          fprintf(stderr, "this line:\n");
581          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
582          fprintf(stderr, "\n");
583          if (error_count == 0 &&
584              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
585            {
586            fprintf(stderr, "pcregrep: error %d means that a resource limit "
587              "was exceeded\n", mrc);
588            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
589            }
590          if (error_count++ > 20)
591            {
592            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
593            exit(2);
594            }
595          match = invert;    /* No more matching; don't show the line again */
596          break;
597          }
598        }
599    
600      /* If it's a match or a not-match (as required), do what's wanted. */
601    
602    if (match != invert)    if (match != invert)
603      {      {
604        BOOL hyphenprinted = FALSE;
605    
606        /* We've failed if we want a file that doesn't have any matches. */
607    
608        if (filenames == FN_NOMATCH_ONLY) return 1;
609    
610        /* Just count if just counting is wanted. */
611    
612      if (count_only) count++;      if (count_only) count++;
613    
614      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
615        in the file. */
616    
617        else if (filenames == FN_ONLY)
618        {        {
619        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        fprintf(stdout, "%s\n", printname);
620        return 0;        return 0;
621        }        }
622    
623      else if (silent) return 0;      /* Likewise, if all we want is a yes/no answer. */
624    
625        else if (quiet) return 0;
626    
627        /* The --only-matching option prints just the substring that matched, and
628        does not pring any context. */
629    
630        else if (only_matching)
631          {
632          if (printname != NULL) fprintf(stdout, "%s:", printname);
633          if (number) fprintf(stdout, "%d:", linenumber);
634          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
635          fprintf(stdout, "\n");
636          }
637    
638        /* This is the default case when none of the above options is set. We print
639        the matching lines(s), possibly preceded and/or followed by other lines of
640        context. */
641    
642      else      else
643        {        {
644        if (name != NULL) fprintf(stdout, "%s:", name);        /* See if there is a requirement to print some "after" lines from a
645          previous match. We never print any overlaps. */
646    
647          if (after_context > 0 && lastmatchnumber > 0)
648            {
649            int linecount = 0;
650            char *p = lastmatchrestart;
651    
652            while (p < ptr && linecount < after_context)
653              {
654              while (*p != '\n') p++;
655              p++;
656              linecount++;
657              }
658    
659            /* It is important to advance lastmatchrestart during this printing so
660            that it interacts correctly with any "before" printing below. Print
661            each line's data using fwrite() in case there are binary zeroes. */
662    
663            while (lastmatchrestart < p)
664              {
665              char *pp = lastmatchrestart;
666              if (printname != NULL) fprintf(stdout, "%s-", printname);
667              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
668              while (*pp != '\n') pp++;
669              fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
670              lastmatchrestart = pp + 1;
671              }
672            if (lastmatchrestart != ptr) hyphenpending = TRUE;
673            }
674    
675          /* If there were non-contiguous lines printed above, insert hyphens. */
676    
677          if (hyphenpending)
678            {
679            fprintf(stdout, "--\n");
680            hyphenpending = FALSE;
681            hyphenprinted = TRUE;
682            }
683    
684          /* See if there is a requirement to print some "before" lines for this
685          match. Again, don't print overlaps. */
686    
687          if (before_context > 0)
688            {
689            int linecount = 0;
690            char *p = ptr;
691    
692            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
693                   linecount < before_context)
694              {
695              linecount++;
696              p--;
697              while (p > buffer && p[-1] != '\n') p--;
698              }
699    
700            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
701              fprintf(stdout, "--\n");
702    
703            while (p < ptr)
704              {
705              char *pp = p;
706              if (printname != NULL) fprintf(stdout, "%s-", printname);
707              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
708              while (*pp != '\n') pp++;
709              fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */
710              p = pp + 1;
711              }
712            }
713    
714          /* Now print the matching line(s); ensure we set hyphenpending at the end
715          of the file if any context lines are being output. */
716    
717          if (after_context > 0 || before_context > 0)
718            endhyphenpending = TRUE;
719    
720          if (printname != NULL) fprintf(stdout, "%s:", printname);
721        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
722        fprintf(stdout, "%s\n", buffer);  
723          /* In multiline mode, we want to print to the end of the line in which
724          the end of the matched string is found, so we adjust linelength and the
725          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
726          start of the match will always be before the first \n character. */
727    
728          if (multiline)
729            {
730            char *endmatch = ptr + offsets[1];
731            t = ptr;
732            while (t < endmatch) { if (*t++ == '\n') linenumber++; }
733            while (endmatch < endptr && *endmatch != '\n') endmatch++;
734            linelength = endmatch - ptr;
735            }
736    
737          /*** NOTE: Use only fwrite() to output the data line, so that binary
738          zeroes are treated as just another data character. */
739    
740          /* This extra option, for Jeffrey Friedl's debugging requirements,
741          replaces the matched string, or a specific captured string if it exists,
742          with X. When this happens, colouring is ignored. */
743    
744    #ifdef JFRIEDL_DEBUG
745          if (S_arg >= 0 && S_arg < mrc)
746            {
747            int first = S_arg * 2;
748            int last  = first + 1;
749            fwrite(ptr, 1, offsets[first], stdout);
750            fprintf(stdout, "X");
751            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
752            }
753          else
754    #endif
755    
756          /* We have to split the line(s) up if colouring. */
757    
758          if (do_colour)
759            {
760            fwrite(ptr, 1, offsets[0], stdout);
761            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
762            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
763            fprintf(stdout, "%c[00m", 0x1b);
764            fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
765            }
766          else fwrite(ptr, 1, linelength, stdout);
767    
768          fprintf(stdout, "\n");
769        }        }
770    
771      rc = 0;      /* End of doing what has to be done for a match */
772    
773        rc = 0;    /* Had some success */
774    
775        /* Remember where the last match happened for after_context. We remember
776        where we are about to restart, and that line's number. */
777    
778        lastmatchrestart = ptr + linelength + 1;
779        lastmatchnumber = linenumber + 1;
780      }      }
781    
782      /* Advance to after the newline and increment the line number. */
783    
784      ptr += linelength + 1;
785      linenumber++;
786    
787      /* If we haven't yet reached the end of the file (the buffer is full), and
788      the current point is in the top 1/3 of the buffer, slide the buffer down by
789      1/3 and refill it. Before we do this, if some unprinted "after" lines are
790      about to be lost, print them. */
791    
792      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
793        {
794        if (after_context > 0 &&
795            lastmatchnumber > 0 &&
796            lastmatchrestart < buffer + MBUFTHIRD)
797          {
798          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
799          lastmatchnumber = 0;
800          }
801    
802        /* Now do the shuffle */
803    
804        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
805        ptr -= MBUFTHIRD;
806        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
807        endptr = buffer + bufflength;
808    
809        /* Adjust any last match point */
810    
811        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
812        }
813      }     /* Loop through the whole file */
814    
815    /* End of file; print final "after" lines if wanted; do_after_lines sets
816    hyphenpending if it prints something. */
817    
818    if (!only_matching && !count_only)
819      {
820      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
821      hyphenpending |= endhyphenpending;
822    }    }
823    
824    /* Print the file name if we are looking for those without matches and there
825    were none. If we found a match, we won't have got this far. */
826    
827    if (filenames == FN_NOMATCH_ONLY)
828      {
829      fprintf(stdout, "%s\n", printname);
830      return 0;
831      }
832    
833    /* Print the match count if wanted */
834    
835  if (count_only)  if (count_only)
836    {    {
837    if (name != NULL) fprintf(stdout, "%s:", name);    if (printname != NULL) fprintf(stdout, "%s:", printname);
838    fprintf(stdout, "%d\n", count);    fprintf(stdout, "%d\n", count);
839    }    }
840    
# Line 114  return rc; Line 843  return rc;
843    
844    
845    
846    /*************************************************
847    *     Grep a file or recurse into a directory    *
848    *************************************************/
849    
850    /* Given a path name, if it's a directory, scan all the files if we are
851    recursing; if it's a file, grep it.
852    
853    Arguments:
854      pathname          the path to investigate
855      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
856      only_one_at_top   TRUE if the path is the only one at toplevel
857    
858    Returns:   0 if there was at least one match
859               1 if there were no matches
860               2 there was some kind of error
861    
862    However, file opening failures are suppressed if "silent" is set.
863    */
864    
865    static int
866    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
867    {
868    int rc = 1;
869    int sep;
870    FILE *in;
871    
872    /* If the file name is "-" we scan stdin */
873    
874    if (strcmp(pathname, "-") == 0)
875      {
876      return pcregrep(stdin,
877        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
878          stdin_name : NULL);
879      }
880    
881    
882    /* If the file is a directory, skip if skipping or if we are recursing, scan
883    each file within it, subject to any include or exclude patterns that were set.
884    The scanning code is localized so it can be made system-specific. */
885    
886    if ((sep = isdirectory(pathname)) != 0)
887      {
888      if (dee_action == dee_SKIP) return 1;
889      if (dee_action == dee_RECURSE)
890        {
891        char buffer[1024];
892        char *nextfile;
893        directory_type *dir = opendirectory(pathname);
894    
895        if (dir == NULL)
896          {
897          if (!silent)
898            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
899              strerror(errno));
900          return 2;
901          }
902    
903        while ((nextfile = readdirectory(dir)) != NULL)
904          {
905          int frc, blen;
906          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
907          blen = strlen(buffer);
908    
909          if (exclude_compiled != NULL &&
910              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
911            continue;
912    
913          if (include_compiled != NULL &&
914              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
915            continue;
916    
917          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
918          if (frc > 1) rc = frc;
919           else if (frc == 0 && rc == 1) rc = 0;
920          }
921    
922        closedirectory(dir);
923        return rc;
924        }
925      }
926    
927    /* If the file is not a directory and not a regular file, skip it if that's
928    been requested. */
929    
930    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
931    
932    /* Control reaches here if we have a regular file, or if we have a directory
933    and recursion or skipping was not requested, or if we have anything else and
934    skipping was not requested. The scan proceeds. If this is the first and only
935    argument at top level, we don't show the file name, unless we are only showing
936    the file name, or the filename was forced (-H). */
937    
938    in = fopen(pathname, "r");
939    if (in == NULL)
940      {
941      if (!silent)
942        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
943          strerror(errno));
944      return 2;
945      }
946    
947    rc = pcregrep(in, (filenames > FN_DEFAULT ||
948      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
949    
950    fclose(in);
951    return rc;
952    }
953    
954    
955    
956    
957  /*************************************************  /*************************************************
958  *                Usage function                  *  *                Usage function                  *
# Line 122  return rc; Line 961  return rc;
961  static int  static int
962  usage(int rc)  usage(int rc)
963  {  {
964  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");  option_item *op;
965    fprintf(stderr, "Usage: pcregrep [-");
966    for (op = optionlist; op->one_char != 0; op++)
967      {
968      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
969      }
970    fprintf(stderr, "] [long options] [pattern] [files]\n");
971    fprintf(stderr, "Type `pcregrep --help' for more information.\n");
972  return rc;  return rc;
973  }  }
974    
# Line 130  return rc; Line 976  return rc;
976    
977    
978  /*************************************************  /*************************************************
979    *                Help function                   *
980    *************************************************/
981    
982    static void
983    help(void)
984    {
985    option_item *op;
986    
987    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
988    printf("Search for PATTERN in each FILE or standard input.\n");
989    printf("PATTERN must be present if neither -e nor -f is used.\n");
990    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
991    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
992    
993    printf("Options:\n");
994    
995    for (op = optionlist; op->one_char != 0; op++)
996      {
997      int n;
998      char s[4];
999      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1000      printf("  %s --%s%n", s, op->long_name, &n);
1001      n = 30 - n;
1002      if (n < 1) n = 1;
1003      printf("%.*s%s\n", n, "                    ", op->help_text);
1004      }
1005    
1006    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1007    printf("trailing white space is removed and blank lines are ignored.\n");
1008    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1009    
1010    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1011    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1012    }
1013    
1014    
1015    
1016    
1017    /*************************************************
1018    *    Handle a single-letter, no data option      *
1019    *************************************************/
1020    
1021    static int
1022    handle_option(int letter, int options)
1023    {
1024    switch(letter)
1025      {
1026      case N_HELP: help(); exit(0);
1027      case 'c': count_only = TRUE; break;
1028      case 'F': process_options |= PO_FIXED_STRINGS; break;
1029      case 'H': filenames = FN_FORCE; break;
1030      case 'h': filenames = FN_NONE; break;
1031      case 'i': options |= PCRE_CASELESS; break;
1032      case 'l': filenames = FN_ONLY; break;
1033      case 'L': filenames = FN_NOMATCH_ONLY; break;
1034      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1035      case 'n': number = TRUE; break;
1036      case 'o': only_matching = TRUE; break;
1037      case 'q': quiet = TRUE; break;
1038      case 'r': dee_action = dee_RECURSE; break;
1039      case 's': silent = TRUE; break;
1040      case 'u': options |= PCRE_UTF8; break;
1041      case 'v': invert = TRUE; break;
1042      case 'w': process_options |= PO_WORD_MATCH; break;
1043      case 'x': process_options |= PO_LINE_MATCH; break;
1044    
1045      case 'V':
1046      fprintf(stderr, "pcregrep version %s using ", VERSION);
1047      fprintf(stderr, "PCRE version %s\n", pcre_version());
1048      exit(0);
1049      break;
1050    
1051      default:
1052      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1053      exit(usage(2));
1054      }
1055    
1056    return options;
1057    }
1058    
1059    
1060    
1061    
1062    /*************************************************
1063    *          Construct printed ordinal             *
1064    *************************************************/
1065    
1066    /* This turns a number into "1st", "3rd", etc. */
1067    
1068    static char *
1069    ordin(int n)
1070    {
1071    static char buffer[8];
1072    char *p = buffer;
1073    sprintf(p, "%d", n);
1074    while (*p != 0) p++;
1075    switch (n%10)
1076      {
1077      case 1: strcpy(p, "st"); break;
1078      case 2: strcpy(p, "nd"); break;
1079      case 3: strcpy(p, "rd"); break;
1080      default: strcpy(p, "th"); break;
1081      }
1082    return buffer;
1083    }
1084    
1085    
1086    
1087    /*************************************************
1088    *          Compile a single pattern              *
1089    *************************************************/
1090    
1091    /* When the -F option has been used, this is called for each substring.
1092    Otherwise it's called for each supplied pattern.
1093    
1094    Arguments:
1095      pattern        the pattern string
1096      options        the PCRE options
1097      filename       the file name, or NULL for a command-line pattern
1098      count          0 if this is the only command line pattern, or
1099                     number of the command line pattern, or
1100                     linenumber for a pattern from a file
1101    
1102    Returns:         TRUE on success, FALSE after an error
1103    */
1104    
1105    static BOOL
1106    compile_single_pattern(char *pattern, int options, char *filename, int count)
1107    {
1108    char buffer[MBUFTHIRD + 16];
1109    const char *error;
1110    int errptr;
1111    
1112    if (pattern_count >= MAX_PATTERN_COUNT)
1113      {
1114      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1115        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1116      return FALSE;
1117      }
1118    
1119    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1120      suffix[process_options]);
1121    pattern_list[pattern_count] =
1122      pcre_compile(buffer, options, &error, &errptr, pcretables);
1123    if (pattern_list[pattern_count++] != NULL) return TRUE;
1124    
1125    /* Handle compile errors */
1126    
1127    errptr -= (int)strlen(prefix[process_options]);
1128    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1129    
1130    if (filename == NULL)
1131      {
1132      if (count == 0)
1133        fprintf(stderr, "pcregrep: Error in command-line regex "
1134          "at offset %d: %s\n", errptr, error);
1135      else
1136        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1137          "at offset %d: %s\n", ordin(count), errptr, error);
1138      }
1139    else
1140      {
1141      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1142        "at offset %d: %s\n", count, filename, errptr, error);
1143      }
1144    
1145    return FALSE;
1146    }
1147    
1148    
1149    
1150    /*************************************************
1151    *           Compile one supplied pattern         *
1152    *************************************************/
1153    
1154    /* When the -F option has been used, each string may be a list of strings,
1155    separated by newlines. They will be matched literally.
1156    
1157    Arguments:
1158      pattern        the pattern string
1159      options        the PCRE options
1160      filename       the file name, or NULL for a command-line pattern
1161      count          0 if this is the only command line pattern, or
1162                     number of the command line pattern, or
1163                     linenumber for a pattern from a file
1164    
1165    Returns:         TRUE on success, FALSE after an error
1166    */
1167    
1168    static BOOL
1169    compile_pattern(char *pattern, int options, char *filename, int count)
1170    {
1171    if ((process_options & PO_FIXED_STRINGS) != 0)
1172      {
1173      char buffer[MBUFTHIRD];
1174      for(;;)
1175        {
1176        char *p = strchr(pattern, '\n');
1177        if (p == NULL)
1178          return compile_single_pattern(pattern, options, filename, count);
1179        sprintf(buffer, "%.*s", p - pattern, pattern);
1180        pattern = p + 1;
1181        if (!compile_single_pattern(buffer, options, filename, count))
1182          return FALSE;
1183        }
1184      }
1185    else return compile_single_pattern(pattern, options, filename, count);
1186    }
1187    
1188    
1189    
1190    /*************************************************
1191  *                Main program                    *  *                Main program                    *
1192  *************************************************/  *************************************************/
1193    
1194    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1195    
1196  int  int
1197  main(int argc, char **argv)  main(int argc, char **argv)
1198  {  {
1199  int i;  int i, j;
1200  int rc = 1;  int rc = 1;
1201  int options = 0;  int pcre_options = 0;
1202    int cmd_pattern_count = 0;
1203  int errptr;  int errptr;
1204    BOOL only_one_at_top;
1205    char *patterns[MAX_PATTERN_COUNT];
1206    const char *locale_from = "--locale";
1207  const char *error;  const char *error;
 BOOL filenames = TRUE;  
1208    
1209  /* Process the options */  /* Process the options */
1210    
1211  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
1212    {    {
1213    char *s;    option_item *op = NULL;
1214      char *option_data = (char *)"";    /* default to keep compiler happy */
1215      BOOL longop;
1216      BOOL longopwasequals = FALSE;
1217    
1218    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1219    s = argv[i] + 1;  
1220    while (*s != 0)    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1221      but only if we have previously had -e or -f to define the patterns. */
1222    
1223      if (argv[i][1] == 0)
1224        {
1225        if (pattern_filename != NULL || pattern_count > 0) break;
1226          else exit(usage(2));
1227        }
1228    
1229      /* Handle a long name option, or -- to terminate the options */
1230    
1231      if (argv[i][1] == '-')
1232        {
1233        char *arg = argv[i] + 2;
1234        char *argequals = strchr(arg, '=');
1235    
1236        if (*arg == 0)    /* -- terminates options */
1237          {
1238          i++;
1239          break;                /* out of the options-handling loop */
1240          }
1241    
1242        longop = TRUE;
1243    
1244        /* Some long options have data that follows after =, for example file=name.
1245        Some options have variations in the long name spelling: specifically, we
1246        allow "regexp" because GNU grep allows it, though I personally go along
1247        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1248        These options are entered in the table as "regex(p)". No option is in both
1249        these categories, fortunately. */
1250    
1251        for (op = optionlist; op->one_char != 0; op++)
1252          {
1253          char *opbra = strchr(op->long_name, '(');
1254          char *equals = strchr(op->long_name, '=');
1255          if (opbra == NULL)     /* Not a (p) case */
1256            {
1257            if (equals == NULL)  /* Not thing=data case */
1258              {
1259              if (strcmp(arg, op->long_name) == 0) break;
1260              }
1261            else                 /* Special case xxx=data */
1262              {
1263              int oplen = equals - op->long_name;
1264              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1265              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1266                {
1267                option_data = arg + arglen;
1268                if (*option_data == '=')
1269                  {
1270                  option_data++;
1271                  longopwasequals = TRUE;
1272                  }
1273                break;
1274                }
1275              }
1276            }
1277          else                   /* Special case xxxx(p) */
1278            {
1279            char buff1[24];
1280            char buff2[24];
1281            int baselen = opbra - op->long_name;
1282            sprintf(buff1, "%.*s", baselen, op->long_name);
1283            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1284              opbra + 1);
1285            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1286              break;
1287            }
1288          }
1289    
1290        if (op->one_char == 0)
1291          {
1292          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1293          exit(usage(2));
1294          }
1295        }
1296    
1297      /* One-char options; many that have no data may be in a single argument; we
1298      continue till we hit the last one or one that needs data. */
1299    
1300      else
1301      {      {
1302      switch (*s++)      char *s = argv[i] + 1;
1303        longop = FALSE;
1304        while (*s != 0)
1305        {        {
1306        case 'c': count_only = TRUE; break;        for (op = optionlist; op->one_char != 0; op++)
1307        case 'h': filenames = FALSE; break;          { if (*s == op->one_char) break; }
1308        case 'i': options |= PCRE_CASELESS; break;        if (op->one_char == 0)
1309        case 'l': filenames_only = TRUE;          {
1310        case 'n': number = TRUE; break;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1311        case 's': silent = TRUE; break;            *s, argv[i]);
1312        case 'v': invert = TRUE; break;          exit(usage(2));
1313        case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;          }
1314          if (op->type != OP_NODATA || s[1] == 0)
1315            {
1316            option_data = s+1;
1317            break;
1318            }
1319          pcre_options = handle_option(*s++, pcre_options);
1320          }
1321        }
1322    
1323      /* At this point we should have op pointing to a matched option. If the type
1324      is NO_DATA, it means that there is no data, and the option might set
1325      something in the PCRE options. */
1326    
1327      if (op->type == OP_NODATA)
1328        {
1329        pcre_options = handle_option(op->one_char, pcre_options);
1330        continue;
1331        }
1332    
1333        case 'V':    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1334        fprintf(stderr, "PCRE version %s\n", pcre_version());    either has a value or defaults to something. It cannot have data in a
1335      separate item. At the moment, the only such options are "colo(u)r" and
1336      Jeffrey Friedl's special debugging option. */
1337    
1338      if (*option_data == 0 &&
1339          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1340        {
1341        switch (op->one_char)
1342          {
1343          case N_COLOUR:
1344          colour_option = (char *)"auto";
1345          break;
1346    #ifdef JFRIEDL_DEBUG
1347          case 'S':
1348          S_arg = 0;
1349        break;        break;
1350    #endif
1351          }
1352        continue;
1353        }
1354    
1355      /* Otherwise, find the data string for the option. */
1356    
1357      if (*option_data == 0)
1358        {
1359        if (i >= argc - 1 || longopwasequals)
1360          {
1361          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1362          exit(usage(2));
1363          }
1364        option_data = argv[++i];
1365        }
1366    
1367        default:    /* If the option type is OP_PATLIST, it's the -e option, which can be called
1368        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);    multiple times to create a list of patterns. */
1369        return usage(2);  
1370      if (op->type == OP_PATLIST)
1371        {
1372        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1373          {
1374          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1375            MAX_PATTERN_COUNT);
1376          return 2;
1377        }        }
1378        patterns[cmd_pattern_count++] = option_data;
1379        }
1380    
1381      /* Otherwise, deal with single string or numeric data values. */
1382    
1383      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1384        {
1385        *((char **)op->dataptr) = option_data;
1386      }      }
1387      else
1388        {
1389        char *endptr;
1390        int n = strtoul(option_data, &endptr, 10);
1391        if (*endptr != 0)
1392          {
1393          if (longop)
1394            {
1395            char *equals = strchr(op->long_name, '=');
1396            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1397              equals - op->long_name;
1398            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1399              option_data, nlen, op->long_name);
1400            }
1401          else
1402            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1403              option_data, op->one_char);
1404          exit(usage(2));
1405          }
1406        *((int *)op->dataptr) = n;
1407        }
1408      }
1409    
1410    /* Options have been decoded. If -C was used, its value is used as a default
1411    for -A and -B. */
1412    
1413    if (both_context > 0)
1414      {
1415      if (after_context == 0) after_context = both_context;
1416      if (before_context == 0) before_context = both_context;
1417    }    }
1418    
1419  /* There must be at least a regexp argument */  /* If a locale has not been provided as an option, see if the LC_CTYPE or
1420    LC_ALL environment variable is set, and if so, use it. */
1421    
1422  if (i >= argc) return usage(0);  if (locale == NULL)
1423      {
1424      locale = getenv("LC_ALL");
1425      locale_from = "LCC_ALL";
1426      }
1427    
1428    if (locale == NULL)
1429      {
1430      locale = getenv("LC_CTYPE");
1431      locale_from = "LC_CTYPE";
1432      }
1433    
1434  /* Compile the regular expression. */  /* If a locale has been provided, set it, and generate the tables the PCRE
1435    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1436    
1437  pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  if (locale != NULL)
 if (pattern == NULL)  
1438    {    {
1439    fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);    if (setlocale(LC_CTYPE, locale) == NULL)
1440        {
1441        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1442          locale, locale_from);
1443        return 2;
1444        }
1445      pcretables = pcre_maketables();
1446      }
1447    
1448    /* Sort out colouring */
1449    
1450    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1451      {
1452      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1453      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1454      else
1455        {
1456        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1457          colour_option);
1458        return 2;
1459        }
1460      if (do_colour)
1461        {
1462        char *cs = getenv("PCREGREP_COLOUR");
1463        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1464        if (cs != NULL) colour_string = cs;
1465        }
1466      }
1467    
1468    /* Interpret the text values for -d and -D */
1469    
1470    if (dee_option != NULL)
1471      {
1472      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1473      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1474      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1475      else
1476        {
1477        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1478        return 2;
1479        }
1480      }
1481    
1482    if (DEE_option != NULL)
1483      {
1484      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1485      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1486      else
1487        {
1488        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1489        return 2;
1490        }
1491      }
1492    
1493    /* Check the value for Jeff Friedl's debugging option. */
1494    
1495    #ifdef JFRIEDL_DEBUG
1496    if (S_arg > 9)
1497      {
1498      fprintf(stderr, "pcregrep: bad value for -S option\n");
1499    return 2;    return 2;
1500    }    }
1501    #endif
1502    
1503    /* Get memory to store the pattern and hints lists. */
1504    
1505  /* Study the regular expression, as we will be running it may times */  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1506    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1507    
1508  hints = pcre_study(pattern, 0, &error);  if (pattern_list == NULL || hints_list == NULL)
 if (error != NULL)  
1509    {    {
1510    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);    fprintf(stderr, "pcregrep: malloc failed\n");
1511    return 2;    return 2;
1512    }    }
1513    
1514  /* If there are no further arguments, do the business on stdin and exit */  /* If no patterns were provided by -e, and there is no file provided by -f,
1515    the first argument is the one and only pattern, and it must exist. */
1516    
1517  if (i >= argc) return pcregrep(stdin, NULL);  if (cmd_pattern_count == 0 && pattern_filename == NULL)
1518      {
1519      if (i >= argc) return usage(2);
1520      patterns[cmd_pattern_count++] = argv[i++];
1521      }
1522    
1523  /* Otherwise, work through the remaining arguments as files. If there is only  /* Compile the patterns that were provided on the command line, either by
1524  one, don't give its name on the output. */  multiple uses of -e or as a single unkeyed pattern. */
1525    
1526  if (i == argc - 1) filenames = FALSE;  for (j = 0; j < cmd_pattern_count; j++)
1527  if (filenames_only) filenames = TRUE;    {
1528      if (!compile_pattern(patterns[j], pcre_options, NULL,
1529           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1530        return 2;
1531      }
1532    
1533  for (; i < argc; i++)  /* Compile the regular expressions that are provided in a file. */
1534    
1535    if (pattern_filename != NULL)
1536    {    {
1537    FILE *in = fopen(argv[i], "r");    int linenumber = 0;
1538    if (in == NULL)    FILE *f;
1539      char *filename;
1540      char buffer[MBUFTHIRD];
1541    
1542      if (strcmp(pattern_filename, "-") == 0)
1543      {      {
1544      fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));      f = stdin;
1545      rc = 2;      filename = stdin_name;
1546      }      }
1547    else    else
1548      {      {
1549      int frc = pcregrep(in, filenames? argv[i] : NULL);      f = fopen(pattern_filename, "r");
1550      if (frc == 0 && rc == 1) rc = 0;      if (f == NULL)
1551      fclose(in);        {
1552          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1553            strerror(errno));
1554          return 2;
1555          }
1556        filename = pattern_filename;
1557        }
1558    
1559      while (fgets(buffer, MBUFTHIRD, f) != NULL)
1560        {
1561        char *s = buffer + (int)strlen(buffer);
1562        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1563        *s = 0;
1564        linenumber++;
1565        if (buffer[0] == 0) continue;   /* Skip blank lines */
1566        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1567          return 2;
1568        }
1569    
1570      if (f != stdin) fclose(f);
1571      }
1572    
1573    /* Study the regular expressions, as we will be running them many times */
1574    
1575    for (j = 0; j < pattern_count; j++)
1576      {
1577      hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1578      if (error != NULL)
1579        {
1580        char s[16];
1581        if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1582        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1583        return 2;
1584        }
1585      }
1586    
1587    /* If there are include or exclude patterns, compile them. */
1588    
1589    if (exclude_pattern != NULL)
1590      {
1591      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1592        pcretables);
1593      if (exclude_compiled == NULL)
1594        {
1595        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1596          errptr, error);
1597        return 2;
1598      }      }
1599    }    }
1600    
1601    if (include_pattern != NULL)
1602      {
1603      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1604        pcretables);
1605      if (include_compiled == NULL)
1606        {
1607        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1608          errptr, error);
1609        return 2;
1610        }
1611      }
1612    
1613    /* If there are no further arguments, do the business on stdin and exit. */
1614    
1615    if (i >= argc)
1616      return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1617    
1618    /* Otherwise, work through the remaining arguments as files or directories.
1619    Pass in the fact that there is only one argument at top level - this suppresses
1620    the file name if the argument is not a directory and filenames are not
1621    otherwise forced. */
1622    
1623    only_one_at_top = i == argc - 1;   /* Catch initial value of i */
1624    
1625    for (; i < argc; i++)
1626      {
1627      int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1628        only_one_at_top);
1629      if (frc > 1) rc = frc;
1630        else if (frc == 0 && rc == 1) rc = 0;
1631      }
1632    
1633  return rc;  return rc;
1634  }  }
1635    
1636  /* End */  /* End of pcregrep */

Legend:
Removed from v.49  
changed lines
  Added in v.87

  ViewVC Help
Powered by ViewVC 1.1.5