/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2006 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40  #include <ctype.h>  #include <ctype.h>
41    #include <locale.h>
42  #include <stdio.h>  #include <stdio.h>
43  #include <string.h>  #include <string.h>
44  #include <stdlib.h>  #include <stdlib.h>
45  #include <errno.h>  #include <errno.h>
46    
47    #include <sys/types.h>
48    #include <sys/stat.h>
49    #include <unistd.h>
50    
51  #include "config.h"  #include "config.h"
52  #include "pcre.h"  #include "pcre.h"
53    
# Line 18  its pattern matching. On a Unix system i Line 56  its pattern matching. On a Unix system i
56    
57  typedef int BOOL;  typedef int BOOL;
58    
59  #define VERSION "2.0 01-Aug-2001"  #define VERSION "4.2 09-Jan-2006"
60  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
61    
62    #if BUFSIZ > 8192
63    #define MBUFTHIRD BUFSIZ
64    #else
65    #define MBUFTHIRD 8192
66    #endif
67    
68    
69    /* Values for the "filenames" variable, which specifies options for file name
70    output. The order is important; it is assumed that a file name is wanted for
71    all values greater than FN_DEFAULT. */
72    
73    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
74    
75    /* Actions for the -d and -D options */
76    
77    enum { dee_READ, dee_SKIP, dee_RECURSE };
78    enum { DEE_READ, DEE_SKIP };
79    
80    /* Actions for special processing options (flag bits) */
81    
82    #define PO_WORD_MATCH     0x0001
83    #define PO_LINE_MATCH     0x0002
84    #define PO_FIXED_STRINGS  0x0004
85    
86    
87    
88  /*************************************************  /*************************************************
89  *               Global variables                 *  *               Global variables                 *
90  *************************************************/  *************************************************/
91    
92    /* Jeffrey Friedl has some debugging requirements that are not part of the
93    regular code. */
94    
95    #ifdef JFRIEDL_DEBUG
96    static int S_arg = -1;
97    #endif
98    
99    static char *colour_string = (char *)"1;31";
100    static char *colour_option = NULL;
101    static char *dee_option = NULL;
102    static char *DEE_option = NULL;
103  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
104    static char *stdin_name = (char *)"(standard input)";
105    static char *locale = NULL;
106    
107    static const unsigned char *pcretables = NULL;
108    
109  static int  pattern_count = 0;  static int  pattern_count = 0;
110  static pcre **pattern_list;  static pcre **pattern_list;
111  static pcre_extra **hints_list;  static pcre_extra **hints_list;
112    
113    static char *include_pattern = NULL;
114    static char *exclude_pattern = NULL;
115    
116    static pcre *include_compiled = NULL;
117    static pcre *exclude_compiled = NULL;
118    
119    static int after_context = 0;
120    static int before_context = 0;
121    static int both_context = 0;
122    static int dee_action = dee_READ;
123    static int DEE_action = DEE_READ;
124    static int error_count = 0;
125    static int filenames = FN_DEFAULT;
126    static int process_options = 0;
127    
128  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
129  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
130  static BOOL filenames_only = FALSE;  static BOOL hyphenpending = FALSE;
131  static BOOL invert = FALSE;  static BOOL invert = FALSE;
132    static BOOL multiline = FALSE;
133  static BOOL number = FALSE;  static BOOL number = FALSE;
134  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
135    static BOOL quiet = FALSE;
136  static BOOL silent = FALSE;  static BOOL silent = FALSE;
 static BOOL whole_lines = FALSE;  
137    
138  /* Structure for options and list of them */  /* Structure for options and list of them */
139    
140    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
141           OP_PATLIST };
142    
143  typedef struct option_item {  typedef struct option_item {
144      int type;
145    int one_char;    int one_char;
146    char *long_name;    void *dataptr;
147    char *help_text;    const char *long_name;
148      const char *help_text;
149  } option_item;  } option_item;
150    
151    /* Options without a single-letter equivalent get a negative value. This can be
152    used to identify them. */
153    
154    #define N_COLOUR    (-1)
155    #define N_EXCLUDE   (-2)
156    #define N_HELP      (-3)
157    #define N_INCLUDE   (-4)
158    #define N_LABEL     (-5)
159    #define N_LOCALE    (-6)
160    #define N_NULL      (-7)
161    
162  static option_item optionlist[] = {  static option_item optionlist[] = {
163    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
164    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
165    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
166    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
167    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
168    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
169    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
170    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
171    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
172    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
173    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
174    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
175    { 0,    NULL,           NULL }    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
176      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
177      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
178      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
179      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
180      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
181      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
182      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
183      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
184      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
185      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
186      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
187      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
188      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
189      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
190    #ifdef JFRIEDL_DEBUG
191      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
192    #endif
193      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
194      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
195      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
196      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
197      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
198      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
199      { OP_NODATA,    0,        NULL,               NULL,            NULL }
200  };  };
201    
202    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
203    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
204    that the combination of -w and -x has the same effect as -x on its own, so we
205    can treat them as the same. */
206    
207    static const char *prefix[] = {
208      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
209    
210    static const char *suffix[] = {
211      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
212    
213    
214    
215  /*************************************************  /*************************************************
216  *       Functions for directory scanning         *  *            OS-specific functions               *
217  *************************************************/  *************************************************/
218    
219  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
220  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
221    
222    
223  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
# Line 83  support". */ Line 229  support". */
229    
230  typedef DIR directory_type;  typedef DIR directory_type;
231    
232  int  static int
233  isdirectory(char *filename)  isdirectory(char *filename)
234  {  {
235  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 238  if (stat(filename, &statbuf) < 0)
238  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
239  }  }
240    
241  directory_type *  static directory_type *
242  opendirectory(char *filename)  opendirectory(char *filename)
243  {  {
244  return opendir(filename);  return opendir(filename);
245  }  }
246    
247  char *  static char *
248  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
249  {  {
250  for (;;)  for (;;)
# Line 111  for (;;) Line 257  for (;;)
257  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
258  }  }
259    
260  void  static void
261  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
262  {  {
263  closedir(dir);  closedir(dir);
264  }  }
265    
266    
267  #else  /************* Test for regular file in Unix **********/
268    
269    static int
270    isregfile(char *filename)
271    {
272    struct stat statbuf;
273    if (stat(filename, &statbuf) < 0)
274      return 1;        /* In the expectation that opening as a file will fail */
275    return (statbuf.st_mode & S_IFMT) == S_IFREG;
276    }
277    
278    
279    /************* Test stdout for being a terminal in Unix **********/
280    
281    static BOOL
282    is_stdout_tty(void)
283    {
284    return isatty(fileno(stdout));
285    }
286    
287    
288    /************* Directory scanning in Win32 ***********/
289    
290    /* I (Philip Hazel) have no means of testing this code. It was contributed by
291    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
292    when it did not exist. */
293    
294    
295    #elif HAVE_WIN32API
296    
297    #ifndef STRICT
298    # define STRICT
299    #endif
300    #ifndef WIN32_LEAN_AND_MEAN
301    # define WIN32_LEAN_AND_MEAN
302    #endif
303    #ifndef INVALID_FILE_ATTRIBUTES
304    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
305    #endif
306    
307    #include <windows.h>
308    
309    typedef struct directory_type
310    {
311    HANDLE handle;
312    BOOL first;
313    WIN32_FIND_DATA data;
314    } directory_type;
315    
316    int
317    isdirectory(char *filename)
318    {
319    DWORD attr = GetFileAttributes(filename);
320    if (attr == INVALID_FILE_ATTRIBUTES)
321      return 0;
322    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
323    }
324    
325    directory_type *
326    opendirectory(char *filename)
327    {
328    size_t len;
329    char *pattern;
330    directory_type *dir;
331    DWORD err;
332    len = strlen(filename);
333    pattern = (char *) malloc(len + 3);
334    dir = (directory_type *) malloc(sizeof(*dir));
335    if ((pattern == NULL) || (dir == NULL))
336      {
337      fprintf(stderr, "pcregrep: malloc failed\n");
338      exit(2);
339      }
340    memcpy(pattern, filename, len);
341    memcpy(&(pattern[len]), "\\*", 3);
342    dir->handle = FindFirstFile(pattern, &(dir->data));
343    if (dir->handle != INVALID_HANDLE_VALUE)
344      {
345      free(pattern);
346      dir->first = TRUE;
347      return dir;
348      }
349    err = GetLastError();
350    free(pattern);
351    free(dir);
352    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
353    return NULL;
354    }
355    
356    char *
357    readdirectory(directory_type *dir)
358    {
359    for (;;)
360      {
361      if (!dir->first)
362        {
363        if (!FindNextFile(dir->handle, &(dir->data)))
364          return NULL;
365        }
366      else
367        {
368        dir->first = FALSE;
369        }
370      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
371        return dir->data.cFileName;
372      }
373    #ifndef _MSC_VER
374    return NULL;   /* Keep compiler happy; never executed */
375    #endif
376    }
377    
378    void
379    closedirectory(directory_type *dir)
380    {
381    FindClose(dir->handle);
382    free(dir);
383    }
384    
385    
386    /************* Test for regular file in Win32 **********/
387    
388    /* I don't know how to do this, or if it can be done; assume all paths are
389    regular if they are not directories. */
390    
391    int isregfile(char *filename)
392    {
393    return !isdirectory(filename)
394    }
395    
396    
397    /************* Test stdout for being a terminal in Win32 **********/
398    
399    /* I don't know how to do this; assume never */
400    
401    static BOOL
402    is_stdout_tty(void)
403    {
404    FALSE;
405    }
406    
407    
408  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
409    
410  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
411    
412    #else
413    
414  typedef void directory_type;  typedef void directory_type;
415    
416  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
417  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) {}
418  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) {}
419  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
420    
421    
422    /************* Test for regular when we can't do it **********/
423    
424    /* Assume all files are regular. */
425    
426    int isregfile(char *filename) { return 1; }
427    
428    
429    /************* Test stdout for being a terminal when we can't do it **********/
430    
431    static BOOL
432    is_stdout_tty(void)
433    {
434    return FALSE;
435    }
436    
437    
438  #endif  #endif
439    
440    
# Line 159  return sys_errlist[n]; Line 462  return sys_errlist[n];
462    
463    
464  /*************************************************  /*************************************************
465  *              Grep an individual file           *  *       Print the previous "after" lines         *
466  *************************************************/  *************************************************/
467    
468    /* This is called if we are about to lose said lines because of buffer filling,
469    and at the end of the file. The data in the line is written using fwrite() so
470    that a binary zero does not terminate it.
471    
472    Arguments:
473      lastmatchnumber   the number of the last matching line, plus one
474      lastmatchrestart  where we restarted after the last match
475      endptr            end of available data
476      printname         filename for printing
477    
478    Returns:            nothing
479    */
480    
481    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
482      char *endptr, char *printname)
483    {
484    if (after_context > 0 && lastmatchnumber > 0)
485      {
486      int count = 0;
487      while (lastmatchrestart < endptr && count++ < after_context)
488        {
489        char *pp = lastmatchrestart;
490        if (printname != NULL) fprintf(stdout, "%s-", printname);
491        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
492        while (*pp != '\n') pp++;
493        fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
494        lastmatchrestart = pp + 1;
495        }
496      hyphenpending = TRUE;
497      }
498    }
499    
500    
501    
502    /*************************************************
503    *            Grep an individual file             *
504    *************************************************/
505    
506    /* This is called from grep_or_recurse() below. It uses a buffer that is three
507    times the value of MBUFTHIRD. The matching point is never allowed to stray into
508    the top third of the buffer, thus keeping more of the file available for
509    context printing or for multiline scanning. For large files, the pointer will
510    be in the middle third most of the time, so the bottom third is available for
511    "before" context printing.
512    
513    Arguments:
514      in           the fopened FILE stream
515      printname    the file name if it is to be printed for each match
516                   or NULL if the file name is not to be printed
517                   it cannot be NULL if filenames[_nomatch]_only is set
518    
519    Returns:       0 if there was at least one match
520                   1 otherwise (no matches)
521    */
522    
523  static int  static int
524  pcregrep(FILE *in, char *name)  pcregrep(FILE *in, char *printname)
525  {  {
526  int rc = 1;  int rc = 1;
527  int linenumber = 0;  int linenumber = 1;
528    int lastmatchnumber = 0;
529  int count = 0;  int count = 0;
530  int offsets[99];  int offsets[99];
531  char buffer[BUFSIZ];  char *lastmatchrestart = NULL;
532    char buffer[3*MBUFTHIRD];
533    char *ptr = buffer;
534    char *endptr;
535    size_t bufflength;
536    BOOL endhyphenpending = FALSE;
537    
538    /* Do the first read into the start of the buffer and set up the pointer to
539    end of what we have. */
540    
541    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
542    endptr = buffer + bufflength;
543    
544    /* Loop while the current pointer is not at the end of the file. For large
545    files, endptr will be at the end of the buffer when we are in the middle of the
546    file, but ptr will never get there, because as soon as it gets over 2/3 of the
547    way, the buffer is shifted left and re-filled. */
548    
549  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (ptr < endptr)
550    {    {
   BOOL match = FALSE;  
551    int i;    int i;
552    int length = (int)strlen(buffer);    int mrc = 0;
553    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    BOOL match = FALSE;
554    linenumber++;    char *t = ptr;
555      size_t length, linelength;
556    
557      /* At this point, ptr is at the start of a line. We need to find the length
558      of the subject string to pass to pcre_exec(). In multiline mode, it is the
559      length remainder of the data in the buffer. Otherwise, it is the length of
560      the next line. After matching, we always advance by the length of the next
561      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
562      that any match is constrained to be in the first line. */
563    
564      linelength = 0;
565      while (t < endptr && *t++ != '\n') linelength++;
566      length = multiline? endptr - ptr : linelength;
567    
568    for (i = 0; !match && i < pattern_count; i++)    /* Run through all the patterns until one matches. Note that we don't include
569      the final newline in the subject string. */
570    
571      for (i = 0; i < pattern_count; i++)
572      {      {
573      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
574        offsets, 99) >= 0;        offsets, 99);
575      if (match && whole_lines && offsets[1] != length) match = FALSE;      if (mrc >= 0) { match = TRUE; break; }
576        if (mrc != PCRE_ERROR_NOMATCH)
577          {
578          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
579          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
580          fprintf(stderr, "this line:\n");
581          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
582          fprintf(stderr, "\n");
583          if (error_count == 0 &&
584              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
585            {
586            fprintf(stderr, "pcregrep: error %d means that a resource limit "
587              "was exceeded\n", mrc);
588            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
589            }
590          if (error_count++ > 20)
591            {
592            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
593            exit(2);
594            }
595          match = invert;    /* No more matching; don't show the line again */
596          break;
597          }
598      }      }
599    
600      /* If it's a match or a not-match (as required), do what's wanted. */
601    
602    if (match != invert)    if (match != invert)
603      {      {
604        BOOL hyphenprinted = FALSE;
605    
606        /* We've failed if we want a file that doesn't have any matches. */
607    
608        if (filenames == FN_NOMATCH_ONLY) return 1;
609    
610        /* Just count if just counting is wanted. */
611    
612      if (count_only) count++;      if (count_only) count++;
613    
614      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
615        in the file. */
616    
617        else if (filenames == FN_ONLY)
618        {        {
619        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        fprintf(stdout, "%s\n", printname);
620        return 0;        return 0;
621        }        }
622    
623      else if (silent) return 0;      /* Likewise, if all we want is a yes/no answer. */
624    
625        else if (quiet) return 0;
626    
627        /* The --only-matching option prints just the substring that matched, and
628        does not pring any context. */
629    
630        else if (only_matching)
631          {
632          if (printname != NULL) fprintf(stdout, "%s:", printname);
633          if (number) fprintf(stdout, "%d:", linenumber);
634          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
635          fprintf(stdout, "\n");
636          }
637    
638        /* This is the default case when none of the above options is set. We print
639        the matching lines(s), possibly preceded and/or followed by other lines of
640        context. */
641    
642      else      else
643        {        {
644        if (name != NULL) fprintf(stdout, "%s:", name);        /* See if there is a requirement to print some "after" lines from a
645          previous match. We never print any overlaps. */
646    
647          if (after_context > 0 && lastmatchnumber > 0)
648            {
649            int linecount = 0;
650            char *p = lastmatchrestart;
651    
652            while (p < ptr && linecount < after_context)
653              {
654              while (*p != '\n') p++;
655              p++;
656              linecount++;
657              }
658    
659            /* It is important to advance lastmatchrestart during this printing so
660            that it interacts correctly with any "before" printing below. Print
661            each line's data using fwrite() in case there are binary zeroes. */
662    
663            while (lastmatchrestart < p)
664              {
665              char *pp = lastmatchrestart;
666              if (printname != NULL) fprintf(stdout, "%s-", printname);
667              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
668              while (*pp != '\n') pp++;
669              fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
670              lastmatchrestart = pp + 1;
671              }
672            if (lastmatchrestart != ptr) hyphenpending = TRUE;
673            }
674    
675          /* If there were non-contiguous lines printed above, insert hyphens. */
676    
677          if (hyphenpending)
678            {
679            fprintf(stdout, "--\n");
680            hyphenpending = FALSE;
681            hyphenprinted = TRUE;
682            }
683    
684          /* See if there is a requirement to print some "before" lines for this
685          match. Again, don't print overlaps. */
686    
687          if (before_context > 0)
688            {
689            int linecount = 0;
690            char *p = ptr;
691    
692            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
693                   linecount < before_context)
694              {
695              linecount++;
696              p--;
697              while (p > buffer && p[-1] != '\n') p--;
698              }
699    
700            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
701              fprintf(stdout, "--\n");
702    
703            while (p < ptr)
704              {
705              char *pp = p;
706              if (printname != NULL) fprintf(stdout, "%s-", printname);
707              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
708              while (*pp != '\n') pp++;
709              fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */
710              p = pp + 1;
711              }
712            }
713    
714          /* Now print the matching line(s); ensure we set hyphenpending at the end
715          of the file if any context lines are being output. */
716    
717          if (after_context > 0 || before_context > 0)
718            endhyphenpending = TRUE;
719    
720          if (printname != NULL) fprintf(stdout, "%s:", printname);
721        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
722        fprintf(stdout, "%s\n", buffer);  
723          /* In multiline mode, we want to print to the end of the line in which
724          the end of the matched string is found, so we adjust linelength and the
725          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
726          start of the match will always be before the first \n character. */
727    
728          if (multiline)
729            {
730            char *endmatch = ptr + offsets[1];
731            t = ptr;
732            while (t < endmatch) { if (*t++ == '\n') linenumber++; }
733            while (endmatch < endptr && *endmatch != '\n') endmatch++;
734            linelength = endmatch - ptr;
735            }
736    
737          /*** NOTE: Use only fwrite() to output the data line, so that binary
738          zeroes are treated as just another data character. */
739    
740          /* This extra option, for Jeffrey Friedl's debugging requirements,
741          replaces the matched string, or a specific captured string if it exists,
742          with X. When this happens, colouring is ignored. */
743    
744    #ifdef JFRIEDL_DEBUG
745          if (S_arg >= 0 && S_arg < mrc)
746            {
747            int first = S_arg * 2;
748            int last  = first + 1;
749            fwrite(ptr, 1, offsets[first], stdout);
750            fprintf(stdout, "X");
751            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
752            }
753          else
754    #endif
755    
756          /* We have to split the line(s) up if colouring. */
757    
758          if (do_colour)
759            {
760            fwrite(ptr, 1, offsets[0], stdout);
761            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
762            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
763            fprintf(stdout, "%c[00m", 0x1b);
764            fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
765            }
766          else fwrite(ptr, 1, linelength, stdout);
767    
768          fprintf(stdout, "\n");
769        }        }
770    
771      rc = 0;      /* End of doing what has to be done for a match */
772    
773        rc = 0;    /* Had some success */
774    
775        /* Remember where the last match happened for after_context. We remember
776        where we are about to restart, and that line's number. */
777    
778        lastmatchrestart = ptr + linelength + 1;
779        lastmatchnumber = linenumber + 1;
780      }      }
781    
782      /* Advance to after the newline and increment the line number. */
783    
784      ptr += linelength + 1;
785      linenumber++;
786    
787      /* If we haven't yet reached the end of the file (the buffer is full), and
788      the current point is in the top 1/3 of the buffer, slide the buffer down by
789      1/3 and refill it. Before we do this, if some unprinted "after" lines are
790      about to be lost, print them. */
791    
792      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
793        {
794        if (after_context > 0 &&
795            lastmatchnumber > 0 &&
796            lastmatchrestart < buffer + MBUFTHIRD)
797          {
798          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
799          lastmatchnumber = 0;
800          }
801    
802        /* Now do the shuffle */
803    
804        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
805        ptr -= MBUFTHIRD;
806        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
807        endptr = buffer + bufflength;
808    
809        /* Adjust any last match point */
810    
811        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
812        }
813      }     /* Loop through the whole file */
814    
815    /* End of file; print final "after" lines if wanted; do_after_lines sets
816    hyphenpending if it prints something. */
817    
818    if (!only_matching && !count_only)
819      {
820      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
821      hyphenpending |= endhyphenpending;
822      }
823    
824    /* Print the file name if we are looking for those without matches and there
825    were none. If we found a match, we won't have got this far. */
826    
827    if (filenames == FN_NOMATCH_ONLY)
828      {
829      fprintf(stdout, "%s\n", printname);
830      return 0;
831    }    }
832    
833    /* Print the match count if wanted */
834    
835  if (count_only)  if (count_only)
836    {    {
837    if (name != NULL) fprintf(stdout, "%s:", name);    if (printname != NULL) fprintf(stdout, "%s:", printname);
838    fprintf(stdout, "%d\n", count);    fprintf(stdout, "%d\n", count);
839    }    }
840    
# Line 220  return rc; Line 843  return rc;
843    
844    
845    
   
846  /*************************************************  /*************************************************
847  *     Grep a file or recurse into a directory    *  *     Grep a file or recurse into a directory    *
848  *************************************************/  *************************************************/
849    
850    /* Given a path name, if it's a directory, scan all the files if we are
851    recursing; if it's a file, grep it.
852    
853    Arguments:
854      pathname          the path to investigate
855      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
856      only_one_at_top   TRUE if the path is the only one at toplevel
857    
858    Returns:   0 if there was at least one match
859               1 if there were no matches
860               2 there was some kind of error
861    
862    However, file opening failures are suppressed if "silent" is set.
863    */
864    
865  static int  static int
866  grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
867  {  {
868  int rc = 1;  int rc = 1;
869  int sep;  int sep;
870  FILE *in;  FILE *in;
871    
872  /* If the file is a directory and we are recursing, scan each file within it.  /* If the file name is "-" we scan stdin */
 The scanning code is localized so it can be made system-specific. */  
873    
874  if ((sep = isdirectory(filename)) != 0 && recurse)  if (strcmp(pathname, "-") == 0)
875    {    {
876    char buffer[1024];    return pcregrep(stdin,
877    char *nextfile;      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
878    directory_type *dir = opendirectory(filename);        stdin_name : NULL);
879      }
880    
   if (dir == NULL)  
     {  
     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  
       strerror(errno));  
     return 2;  
     }  
881    
882    while ((nextfile = readdirectory(dir)) != NULL)  /* If the file is a directory, skip if skipping or if we are recursing, scan
883    each file within it, subject to any include or exclude patterns that were set.
884    The scanning code is localized so it can be made system-specific. */
885    
886    if ((sep = isdirectory(pathname)) != 0)
887      {
888      if (dee_action == dee_SKIP) return 1;
889      if (dee_action == dee_RECURSE)
890      {      {
891      int frc;      char buffer[1024];
892      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      char *nextfile;
893      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);      directory_type *dir = opendirectory(pathname);
894      if (frc == 0 && rc == 1) rc = 0;  
895      }      if (dir == NULL)
896          {
897          if (!silent)
898            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
899              strerror(errno));
900          return 2;
901          }
902    
903        while ((nextfile = readdirectory(dir)) != NULL)
904          {
905          int frc, blen;
906          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
907          blen = strlen(buffer);
908    
909          if (exclude_compiled != NULL &&
910              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
911            continue;
912    
913          if (include_compiled != NULL &&
914              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
915            continue;
916    
917          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
918          if (frc > 1) rc = frc;
919           else if (frc == 0 && rc == 1) rc = 0;
920          }
921    
922    closedirectory(dir);      closedirectory(dir);
923    return rc;      return rc;
924        }
925    }    }
926    
927  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* If the file is not a directory and not a regular file, skip it if that's
928  the first and only argument at top level, we don't show the file name.  been requested. */
929  Otherwise, control is via the show_filenames variable. */  
930    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
931    
932    /* Control reaches here if we have a regular file, or if we have a directory
933    and recursion or skipping was not requested, or if we have anything else and
934    skipping was not requested. The scan proceeds. If this is the first and only
935    argument at top level, we don't show the file name, unless we are only showing
936    the file name, or the filename was forced (-H). */
937    
938  in = fopen(filename, "r");  in = fopen(pathname, "r");
939  if (in == NULL)  if (in == NULL)
940    {    {
941    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));    if (!silent)
942        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
943          strerror(errno));
944    return 2;    return 2;
945    }    }
946    
947  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);  rc = pcregrep(in, (filenames > FN_DEFAULT ||
948      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
949    
950  fclose(in);  fclose(in);
951  return rc;  return rc;
952  }  }
# Line 287  return rc; Line 961  return rc;
961  static int  static int
962  usage(int rc)  usage(int rc)
963  {  {
964  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  option_item *op;
965    fprintf(stderr, "Usage: pcregrep [-");
966    for (op = optionlist; op->one_char != 0; op++)
967      {
968      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
969      }
970    fprintf(stderr, "] [long options] [pattern] [files]\n");
971  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information.\n");
972  return rc;  return rc;
973  }  }
# Line 304  help(void) Line 984  help(void)
984  {  {
985  option_item *op;  option_item *op;
986    
987  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
988  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
989    printf("PATTERN must be present if neither -e nor -f is used.\n");
990    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
991  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
992    
993  printf("Options:\n");  printf("Options:\n");
# Line 321  for (op = optionlist; op->one_char != 0; Line 1003  for (op = optionlist; op->one_char != 0;
1003    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1004    }    }
1005    
1006  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1007  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1008  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
1009    
1010  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1011  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1012  }  }
1013    
# Line 334  printf("Exit status is 0 if any matches, Line 1015  printf("Exit status is 0 if any matches,
1015    
1016    
1017  /*************************************************  /*************************************************
1018  *                Handle an option                *  *    Handle a single-letter, no data option      *
1019  *************************************************/  *************************************************/
1020    
1021  static int  static int
# Line 342  handle_option(int letter, int options) Line 1023  handle_option(int letter, int options)
1023  {  {
1024  switch(letter)  switch(letter)
1025    {    {
1026    case -1:  help(); exit(0);    case N_HELP: help(); exit(0);
1027    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1028    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1029      case 'H': filenames = FN_FORCE; break;
1030      case 'h': filenames = FN_NONE; break;
1031    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1032    case 'l': filenames_only = TRUE;    case 'l': filenames = FN_ONLY; break;
1033      case 'L': filenames = FN_NOMATCH_ONLY; break;
1034      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1035    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1036    case 'r': recurse = TRUE; break;    case 'o': only_matching = TRUE; break;
1037      case 'q': quiet = TRUE; break;
1038      case 'r': dee_action = dee_RECURSE; break;
1039    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1040      case 'u': options |= PCRE_UTF8; break;
1041    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1042    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1043      case 'x': process_options |= PO_LINE_MATCH; break;
1044    
1045    case 'V':    case 'V':
1046    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s using ", VERSION);
# Line 371  return options; Line 1060  return options;
1060    
1061    
1062  /*************************************************  /*************************************************
1063    *          Construct printed ordinal             *
1064    *************************************************/
1065    
1066    /* This turns a number into "1st", "3rd", etc. */
1067    
1068    static char *
1069    ordin(int n)
1070    {
1071    static char buffer[8];
1072    char *p = buffer;
1073    sprintf(p, "%d", n);
1074    while (*p != 0) p++;
1075    switch (n%10)
1076      {
1077      case 1: strcpy(p, "st"); break;
1078      case 2: strcpy(p, "nd"); break;
1079      case 3: strcpy(p, "rd"); break;
1080      default: strcpy(p, "th"); break;
1081      }
1082    return buffer;
1083    }
1084    
1085    
1086    
1087    /*************************************************
1088    *          Compile a single pattern              *
1089    *************************************************/
1090    
1091    /* When the -F option has been used, this is called for each substring.
1092    Otherwise it's called for each supplied pattern.
1093    
1094    Arguments:
1095      pattern        the pattern string
1096      options        the PCRE options
1097      filename       the file name, or NULL for a command-line pattern
1098      count          0 if this is the only command line pattern, or
1099                     number of the command line pattern, or
1100                     linenumber for a pattern from a file
1101    
1102    Returns:         TRUE on success, FALSE after an error
1103    */
1104    
1105    static BOOL
1106    compile_single_pattern(char *pattern, int options, char *filename, int count)
1107    {
1108    char buffer[MBUFTHIRD + 16];
1109    const char *error;
1110    int errptr;
1111    
1112    if (pattern_count >= MAX_PATTERN_COUNT)
1113      {
1114      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1115        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1116      return FALSE;
1117      }
1118    
1119    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1120      suffix[process_options]);
1121    pattern_list[pattern_count] =
1122      pcre_compile(buffer, options, &error, &errptr, pcretables);
1123    if (pattern_list[pattern_count++] != NULL) return TRUE;
1124    
1125    /* Handle compile errors */
1126    
1127    errptr -= (int)strlen(prefix[process_options]);
1128    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1129    
1130    if (filename == NULL)
1131      {
1132      if (count == 0)
1133        fprintf(stderr, "pcregrep: Error in command-line regex "
1134          "at offset %d: %s\n", errptr, error);
1135      else
1136        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1137          "at offset %d: %s\n", ordin(count), errptr, error);
1138      }
1139    else
1140      {
1141      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1142        "at offset %d: %s\n", count, filename, errptr, error);
1143      }
1144    
1145    return FALSE;
1146    }
1147    
1148    
1149    
1150    /*************************************************
1151    *           Compile one supplied pattern         *
1152    *************************************************/
1153    
1154    /* When the -F option has been used, each string may be a list of strings,
1155    separated by newlines. They will be matched literally.
1156    
1157    Arguments:
1158      pattern        the pattern string
1159      options        the PCRE options
1160      filename       the file name, or NULL for a command-line pattern
1161      count          0 if this is the only command line pattern, or
1162                     number of the command line pattern, or
1163                     linenumber for a pattern from a file
1164    
1165    Returns:         TRUE on success, FALSE after an error
1166    */
1167    
1168    static BOOL
1169    compile_pattern(char *pattern, int options, char *filename, int count)
1170    {
1171    if ((process_options & PO_FIXED_STRINGS) != 0)
1172      {
1173      char buffer[MBUFTHIRD];
1174      for(;;)
1175        {
1176        char *p = strchr(pattern, '\n');
1177        if (p == NULL)
1178          return compile_single_pattern(pattern, options, filename, count);
1179        sprintf(buffer, "%.*s", p - pattern, pattern);
1180        pattern = p + 1;
1181        if (!compile_single_pattern(buffer, options, filename, count))
1182          return FALSE;
1183        }
1184      }
1185    else return compile_single_pattern(pattern, options, filename, count);
1186    }
1187    
1188    
1189    
1190    /*************************************************
1191  *                Main program                    *  *                Main program                    *
1192  *************************************************/  *************************************************/
1193    
1194    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1195    
1196  int  int
1197  main(int argc, char **argv)  main(int argc, char **argv)
1198  {  {
1199  int i, j;  int i, j;
1200  int rc = 1;  int rc = 1;
1201  int options = 0;  int pcre_options = 0;
1202    int cmd_pattern_count = 0;
1203  int errptr;  int errptr;
 const char *error;  
1204  BOOL only_one_at_top;  BOOL only_one_at_top;
1205    char *patterns[MAX_PATTERN_COUNT];
1206    const char *locale_from = "--locale";
1207    const char *error;
1208    
1209  /* Process the options */  /* Process the options */
1210    
1211  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
1212    {    {
1213      option_item *op = NULL;
1214      char *option_data = (char *)"";    /* default to keep compiler happy */
1215      BOOL longop;
1216      BOOL longopwasequals = FALSE;
1217    
1218    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1219    
1220    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1221      but only if we have previously had -e or -f to define the patterns. */
1222    
1223      if (argv[i][1] == 0)
1224        {
1225        if (pattern_filename != NULL || pattern_count > 0) break;
1226          else exit(usage(2));
1227        }
1228    
1229      /* Handle a long name option, or -- to terminate the options */
1230    
1231    if (argv[i][1] == '-')    if (argv[i][1] == '-')
1232      {      {
1233      option_item *op;      char *arg = argv[i] + 2;
1234        char *argequals = strchr(arg, '=');
1235    
1236      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
1237        {        {
1238        pattern_filename = argv[i] + 7;        i++;
1239        continue;        break;                /* out of the options-handling loop */
1240        }        }
1241    
1242        longop = TRUE;
1243    
1244        /* Some long options have data that follows after =, for example file=name.
1245        Some options have variations in the long name spelling: specifically, we
1246        allow "regexp" because GNU grep allows it, though I personally go along
1247        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1248        These options are entered in the table as "regex(p)". No option is in both
1249        these categories, fortunately. */
1250    
1251      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1252        {        {
1253        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1254          char *equals = strchr(op->long_name, '=');
1255          if (opbra == NULL)     /* Not a (p) case */
1256          {          {
1257          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1258          break;            {
1259              if (strcmp(arg, op->long_name) == 0) break;
1260              }
1261            else                 /* Special case xxx=data */
1262              {
1263              int oplen = equals - op->long_name;
1264              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1265              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1266                {
1267                option_data = arg + arglen;
1268                if (*option_data == '=')
1269                  {
1270                  option_data++;
1271                  longopwasequals = TRUE;
1272                  }
1273                break;
1274                }
1275              }
1276            }
1277          else                   /* Special case xxxx(p) */
1278            {
1279            char buff1[24];
1280            char buff2[24];
1281            int baselen = opbra - op->long_name;
1282            sprintf(buff1, "%.*s", baselen, op->long_name);
1283            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1284              opbra + 1);
1285            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1286              break;
1287          }          }
1288        }        }
1289    
1290      if (op->one_char == 0)      if (op->one_char == 0)
1291        {        {
1292        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 417  for (i = 1; i < argc; i++) Line 1294  for (i = 1; i < argc; i++)
1294        }        }
1295      }      }
1296    
1297    /* One-char options */    /* One-char options; many that have no data may be in a single argument; we
1298      continue till we hit the last one or one that needs data. */
1299    
1300    else    else
1301      {      {
1302      char *s = argv[i] + 1;      char *s = argv[i] + 1;
1303        longop = FALSE;
1304      while (*s != 0)      while (*s != 0)
1305        {        {
1306        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
1307            { if (*s == op->one_char) break; }
1308          if (op->one_char == 0)
1309          {          {
1310          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1311          if (pattern_filename[0] == 0)            *s, argv[i]);
1312            {          exit(usage(2));
1313            if (i >= argc - 1)          }
1314              {        if (op->type != OP_NODATA || s[1] == 0)
1315              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
1316              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
1317          break;          break;
1318          }          }
1319        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1320          }
1321        }
1322    
1323      /* At this point we should have op pointing to a matched option. If the type
1324      is NO_DATA, it means that there is no data, and the option might set
1325      something in the PCRE options. */
1326    
1327      if (op->type == OP_NODATA)
1328        {
1329        pcre_options = handle_option(op->one_char, pcre_options);
1330        continue;
1331        }
1332    
1333      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1334      either has a value or defaults to something. It cannot have data in a
1335      separate item. At the moment, the only such options are "colo(u)r" and
1336      Jeffrey Friedl's special debugging option. */
1337    
1338      if (*option_data == 0 &&
1339          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1340        {
1341        switch (op->one_char)
1342          {
1343          case N_COLOUR:
1344          colour_option = (char *)"auto";
1345          break;
1346    #ifdef JFRIEDL_DEBUG
1347          case 'S':
1348          S_arg = 0;
1349          break;
1350    #endif
1351        }        }
1352        continue;
1353        }
1354    
1355      /* Otherwise, find the data string for the option. */
1356    
1357      if (*option_data == 0)
1358        {
1359        if (i >= argc - 1 || longopwasequals)
1360          {
1361          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1362          exit(usage(2));
1363          }
1364        option_data = argv[++i];
1365        }
1366    
1367      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1368      multiple times to create a list of patterns. */
1369    
1370      if (op->type == OP_PATLIST)
1371        {
1372        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1373          {
1374          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1375            MAX_PATTERN_COUNT);
1376          return 2;
1377          }
1378        patterns[cmd_pattern_count++] = option_data;
1379        }
1380    
1381      /* Otherwise, deal with single string or numeric data values. */
1382    
1383      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1384        {
1385        *((char **)op->dataptr) = option_data;
1386        }
1387      else
1388        {
1389        char *endptr;
1390        int n = strtoul(option_data, &endptr, 10);
1391        if (*endptr != 0)
1392          {
1393          if (longop)
1394            {
1395            char *equals = strchr(op->long_name, '=');
1396            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1397              equals - op->long_name;
1398            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1399              option_data, nlen, op->long_name);
1400            }
1401          else
1402            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1403              option_data, op->one_char);
1404          exit(usage(2));
1405          }
1406        *((int *)op->dataptr) = n;
1407        }
1408      }
1409    
1410    /* Options have been decoded. If -C was used, its value is used as a default
1411    for -A and -B. */
1412    
1413    if (both_context > 0)
1414      {
1415      if (after_context == 0) after_context = both_context;
1416      if (before_context == 0) before_context = both_context;
1417      }
1418    
1419    /* If a locale has not been provided as an option, see if the LC_CTYPE or
1420    LC_ALL environment variable is set, and if so, use it. */
1421    
1422    if (locale == NULL)
1423      {
1424      locale = getenv("LC_ALL");
1425      locale_from = "LCC_ALL";
1426      }
1427    
1428    if (locale == NULL)
1429      {
1430      locale = getenv("LC_CTYPE");
1431      locale_from = "LC_CTYPE";
1432      }
1433    
1434    /* If a locale has been provided, set it, and generate the tables the PCRE
1435    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1436    
1437    if (locale != NULL)
1438      {
1439      if (setlocale(LC_CTYPE, locale) == NULL)
1440        {
1441        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1442          locale, locale_from);
1443        return 2;
1444        }
1445      pcretables = pcre_maketables();
1446      }
1447    
1448    /* Sort out colouring */
1449    
1450    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1451      {
1452      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1453      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1454      else
1455        {
1456        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1457          colour_option);
1458        return 2;
1459        }
1460      if (do_colour)
1461        {
1462        char *cs = getenv("PCREGREP_COLOUR");
1463        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1464        if (cs != NULL) colour_string = cs;
1465        }
1466      }
1467    
1468    /* Interpret the text values for -d and -D */
1469    
1470    if (dee_option != NULL)
1471      {
1472      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1473      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1474      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1475      else
1476        {
1477        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1478        return 2;
1479      }      }
1480    }    }
1481    
1482  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  if (DEE_option != NULL)
1483  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    {
1484      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1485      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1486      else
1487        {
1488        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1489        return 2;
1490        }
1491      }
1492    
1493    /* Check the value for Jeff Friedl's debugging option. */
1494    
1495    #ifdef JFRIEDL_DEBUG
1496    if (S_arg > 9)
1497      {
1498      fprintf(stderr, "pcregrep: bad value for -S option\n");
1499      return 2;
1500      }
1501    #endif
1502    
1503    /* Get memory to store the pattern and hints lists. */
1504    
1505    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1506    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1507    
1508  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1509    {    {
# Line 452  if (pattern_list == NULL || hints_list = Line 1511  if (pattern_list == NULL || hints_list =
1511    return 2;    return 2;
1512    }    }
1513    
1514  /* Compile the regular expression(s). */  /* If no patterns were provided by -e, and there is no file provided by -f,
1515    the first argument is the one and only pattern, and it must exist. */
1516    
1517    if (cmd_pattern_count == 0 && pattern_filename == NULL)
1518      {
1519      if (i >= argc) return usage(2);
1520      patterns[cmd_pattern_count++] = argv[i++];
1521      }
1522    
1523    /* Compile the patterns that were provided on the command line, either by
1524    multiple uses of -e or as a single unkeyed pattern. */
1525    
1526    for (j = 0; j < cmd_pattern_count; j++)
1527      {
1528      if (!compile_pattern(patterns[j], pcre_options, NULL,
1529           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1530        return 2;
1531      }
1532    
1533    /* Compile the regular expressions that are provided in a file. */
1534    
1535  if (pattern_filename != NULL)  if (pattern_filename != NULL)
1536    {    {
1537    FILE *f = fopen(pattern_filename, "r");    int linenumber = 0;
1538    char buffer[BUFSIZ];    FILE *f;
1539    if (f == NULL)    char *filename;
1540      char buffer[MBUFTHIRD];
1541    
1542      if (strcmp(pattern_filename, "-") == 0)
1543      {      {
1544      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      f = stdin;
1545        strerror(errno));      filename = stdin_name;
     return 2;  
1546      }      }
1547    while (fgets(buffer, sizeof(buffer), f) != NULL)    else
1548      {      {
1549      char *s = buffer + (int)strlen(buffer);      f = fopen(pattern_filename, "r");
1550      if (pattern_count >= MAX_PATTERN_COUNT)      if (f == NULL)
1551        {        {
1552        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1553          MAX_PATTERN_COUNT);          strerror(errno));
1554        return 2;        return 2;
1555        }        }
1556        filename = pattern_filename;
1557        }
1558    
1559      while (fgets(buffer, MBUFTHIRD, f) != NULL)
1560        {
1561        char *s = buffer + (int)strlen(buffer);
1562      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
     if (s == buffer) continue;  
1563      *s = 0;      *s = 0;
1564      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,      linenumber++;
1565        &errptr, NULL);      if (buffer[0] == 0) continue;   /* Skip blank lines */
1566      if (pattern_list[pattern_count++] == NULL)      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr, error);  
1567        return 2;        return 2;
       }  
1568      }      }
   fclose(f);  
   }  
1569    
1570  /* If no file name, a single regex must be given inline */    if (f != stdin) fclose(f);
   
 else  
   {  
   if (i >= argc) return usage(0);  
   pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);  
   if (pattern_list[0] == NULL)  
     {  
     fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,  
       error);  
     return 2;  
     }  
   pattern_count++;  
1571    }    }
1572    
1573  /* Study the regular expressions, as we will be running them may times */  /* Study the regular expressions, as we will be running them many times */
1574    
1575  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
1576    {    {
# Line 517  for (j = 0; j < pattern_count; j++) Line 1584  for (j = 0; j < pattern_count; j++)
1584      }      }
1585    }    }
1586    
1587  /* If there are no further arguments, do the business on stdin and exit */  /* If there are include or exclude patterns, compile them. */
1588    
1589    if (exclude_pattern != NULL)
1590      {
1591      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1592        pcretables);
1593      if (exclude_compiled == NULL)
1594        {
1595        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1596          errptr, error);
1597        return 2;
1598        }
1599      }
1600    
1601    if (include_pattern != NULL)
1602      {
1603      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1604        pcretables);
1605      if (include_compiled == NULL)
1606        {
1607        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1608          errptr, error);
1609        return 2;
1610        }
1611      }
1612    
1613    /* If there are no further arguments, do the business on stdin and exit. */
1614    
1615  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
1616      return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1617    
1618  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
1619  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
1620  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
1621    otherwise forced. */
1622    
1623  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
1624    
1625  for (; i < argc; i++)  for (; i < argc; i++)
1626    {    {
1627    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1628    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
1629      if (frc > 1) rc = frc;
1630        else if (frc == 0 && rc == 1) rc = 0;
1631    }    }
1632    
1633  return rc;  return rc;
1634  }  }
1635    
1636  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.87

  ViewVC Help
Powered by ViewVC 1.1.5