/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 654 by ph10, Tue Aug 2 11:00:40 2011 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
# Line 45  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.0 07-Jun-2005"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define PATBUFSIZE BUFSIZ
78  #else  #else
79  #define MBUFTHIRD 8192  #define PATBUFSIZE 8192
80  #endif  #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
118  *               Global variables                 *  *               Global variables                 *
119  *************************************************/  *************************************************/
120    
121    /* Jeffrey Friedl has some debugging requirements that are not part of the
122    regular code. */
123    
124    #ifdef JFRIEDL_DEBUG
125    static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130    #endif
131    
132    static int  endlinetype;
133    
134    static char *colour_string = (char *)"1;31";
135    static char *colour_option = NULL;
136    static char *dee_option = NULL;
137    static char *DEE_option = NULL;
138    static char *main_buffer = NULL;
139    static char *newline = NULL;
140  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
141  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
142    static char *locale = NULL;
143    
144    static const unsigned char *pcretables = NULL;
145    
146  static int  pattern_count = 0;  static int  pattern_count = 0;
147  static pcre **pattern_list;  static pcre **pattern_list = NULL;
148  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
149    
150  static char *include_pattern = NULL;  static char *include_pattern = NULL;
151  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
152    static char *include_dir_pattern = NULL;
153    static char *exclude_dir_pattern = NULL;
154    
155  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
156  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
157    static pcre *include_dir_compiled = NULL;
158    static pcre *exclude_dir_compiled = NULL;
159    
160  static int after_context = 0;  static int after_context = 0;
161  static int before_context = 0;  static int before_context = 0;
162  static int both_context = 0;  static int both_context = 0;
163    static int bufthird = PCREGREP_BUFSIZE;
164    static int bufsize = 3*PCREGREP_BUFSIZE;
165    static int dee_action = dee_READ;
166    static int DEE_action = DEE_READ;
167    static int error_count = 0;
168    static int filenames = FN_DEFAULT;
169    static int only_matching = -1;
170    static int process_options = 0;
171    
172    static unsigned long int match_limit = 0;
173    static unsigned long int match_limit_recursion = 0;
174    
175  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
176  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
177  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
 static BOOL filenames_nomatch_only = FALSE;  
178  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
179  static BOOL invert = FALSE;  static BOOL invert = FALSE;
180    static BOOL line_buffered = FALSE;
181    static BOOL line_offsets = FALSE;
182  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
183  static BOOL number = FALSE;  static BOOL number = FALSE;
184    static BOOL omit_zero_count = FALSE;
185    static BOOL resource_error = FALSE;
186  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
 static BOOL recurse = FALSE;  
187  static BOOL silent = FALSE;  static BOOL silent = FALSE;
188  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
 static BOOL word_match = FALSE;  
189    
190  /* Structure for options and list of them */  /* Structure for options and list of them */
191    
192  enum { OP_NODATA, OP_STRING, OP_NUMBER };  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
193           OP_OP_NUMBER, OP_PATLIST };
194    
195  typedef struct option_item {  typedef struct option_item {
196    int type;    int type;
# Line 112  typedef struct option_item { Line 200  typedef struct option_item {
200    const char *help_text;    const char *help_text;
201  } option_item;  } option_item;
202    
203    /* Options without a single-letter equivalent get a negative value. This can be
204    used to identify them. */
205    
206    #define N_COLOUR       (-1)
207    #define N_EXCLUDE      (-2)
208    #define N_EXCLUDE_DIR  (-3)
209    #define N_HELP         (-4)
210    #define N_INCLUDE      (-5)
211    #define N_INCLUDE_DIR  (-6)
212    #define N_LABEL        (-7)
213    #define N_LOCALE       (-8)
214    #define N_NULL         (-9)
215    #define N_LOFFSETS     (-10)
216    #define N_FOFFSETS     (-11)
217    #define N_LBUFFER      (-12)
218    #define N_M_LIMIT      (-13)
219    #define N_M_LIMIT_REC  (-14)
220    #define N_BUFSIZE      (-15)
221    
222  static option_item optionlist[] = {  static option_item optionlist[] = {
223    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
224    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
225    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
226    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
227    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
228    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
229    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
230    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
231    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
232    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
233    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
234    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
235    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
236    { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
237    { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
238    { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
239    { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
240    { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
241    { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
242    { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
243    { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
244    { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },    { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
245    { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },    { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
246    { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },    { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
247    { OP_NODATA, 0,   NULL,               NULL,            NULL }    { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
248      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
249      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
250      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
251      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
252      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
253      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
254      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
255      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
256      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
257      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
258      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
259    
260      /* These two were accidentally implemented with underscores instead of
261      hyphens in the option names. As this was not discovered for several releases,
262      the incorrect versions are left in the table for compatibility. However, the
263      --help function misses out any option that has an underscore in its name. */
264    
265      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
266      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
267    
268    #ifdef JFRIEDL_DEBUG
269      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
270    #endif
271      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
272      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
273      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
274      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
275      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
276      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
277      { OP_NODATA,    0,        NULL,               NULL,            NULL }
278  };  };
279    
280    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
281    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
282    that the combination of -w and -x has the same effect as -x on its own, so we
283    can treat them as the same. */
284    
285    static const char *prefix[] = {
286      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
287    
288    static const char *suffix[] = {
289      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
290    
291    /* UTF-8 tables - used only when the newline setting is "any". */
292    
293    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
294    
295    const char utf8_table4[] = {
296      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
297      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
298      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
299      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
300    
301    
302    
303    /*************************************************
304    *         Exit from the program                  *
305    *************************************************/
306    
307    /* If there has been a resource error, give a suitable message.
308    
309    Argument:  the return code
310    Returns:   does not return
311    */
312    
313    static void
314    pcregrep_exit(int rc)
315    {
316    if (resource_error)
317      {
318      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
319        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
320      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
321      }
322    
323    exit(rc);
324    }
325    
326    
327  /*************************************************  /*************************************************
328  *       Functions for directory scanning         *  *            OS-specific functions               *
329  *************************************************/  *************************************************/
330    
331  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
332  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
333    
334    
335  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
336    
337  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
338  #include <sys/types.h>  #include <sys/types.h>
339  #include <sys/stat.h>  #include <sys/stat.h>
340  #include <dirent.h>  #include <dirent.h>
# Line 184  for (;;) Line 366  for (;;)
366    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
367      return dent->d_name;      return dent->d_name;
368    }    }
369  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
370  }  }
371    
372  static void  static void
# Line 194  closedir(dir); Line 376  closedir(dir);
376  }  }
377    
378    
379    /************* Test for regular file in Unix **********/
380    
381    static int
382    isregfile(char *filename)
383    {
384    struct stat statbuf;
385    if (stat(filename, &statbuf) < 0)
386      return 1;        /* In the expectation that opening as a file will fail */
387    return (statbuf.st_mode & S_IFMT) == S_IFREG;
388    }
389    
390    
391    /************* Test for a terminal in Unix **********/
392    
393    static BOOL
394    is_stdout_tty(void)
395    {
396    return isatty(fileno(stdout));
397    }
398    
399    static BOOL
400    is_file_tty(FILE *f)
401    {
402    return isatty(fileno(f));
403    }
404    
405    
406  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
407    
408  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
409  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
410  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
411    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
412    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
413    undefined when it is indeed undefined. */
414    
415  #elif HAVE_WIN32API  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
416    
417  #ifndef STRICT  #ifndef STRICT
418  # define STRICT  # define STRICT
# Line 209  when it did not exist. */ Line 420  when it did not exist. */
420  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
421  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
422  #endif  #endif
423    
424    #include <windows.h>
425    
426  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
427  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
428  #endif  #endif
429    
 #include <windows.h>  
   
430  typedef struct directory_type  typedef struct directory_type
431  {  {
432  HANDLE handle;  HANDLE handle;
# Line 244  dir = (directory_type *) malloc(sizeof(* Line 456  dir = (directory_type *) malloc(sizeof(*
456  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
457    {    {
458    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
459    exit(2);    pcregrep_exit(2);
460    }    }
461  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
462  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 292  free(dir); Line 504  free(dir);
504  }  }
505    
506    
507    /************* Test for regular file in Win32 **********/
508    
509    /* I don't know how to do this, or if it can be done; assume all paths are
510    regular if they are not directories. */
511    
512    int isregfile(char *filename)
513    {
514    return !isdirectory(filename);
515    }
516    
517    
518    /************* Test for a terminal in Win32 **********/
519    
520    /* I don't know how to do this; assume never */
521    
522    static BOOL
523    is_stdout_tty(void)
524    {
525    return FALSE;
526    }
527    
528    static BOOL
529    is_file_tty(FILE *f)
530    {
531    return FALSE;
532    }
533    
534    
535  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
536    
537  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 300  free(dir); Line 540  free(dir);
540    
541  typedef void directory_type;  typedef void directory_type;
542    
543  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
544  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
545  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
546  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
547    
548    
549    /************* Test for regular when we can't do it **********/
550    
551    /* Assume all files are regular. */
552    
553    int isregfile(char *filename) { return 1; }
554    
555    
556    /************* Test for a terminal when we can't do it **********/
557    
558    static BOOL
559    is_stdout_tty(void)
560    {
561    return FALSE;
562    }
563    
564    static BOOL
565    is_file_tty(FILE *f)
566    {
567    return FALSE;
568    }
569    
570  #endif  #endif
571    
572    
573    
574  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
575  /*************************************************  /*************************************************
576  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
577  *************************************************/  *************************************************/
# Line 332  return sys_errlist[n]; Line 594  return sys_errlist[n];
594    
595    
596  /*************************************************  /*************************************************
597    *            Read one line of input              *
598    *************************************************/
599    
600    /* Normally, input is read using fread() into a large buffer, so many lines may
601    be read at once. However, doing this for tty input means that no output appears
602    until a lot of input has been typed. Instead, tty input is handled line by
603    line. We cannot use fgets() for this, because it does not stop at a binary
604    zero, and therefore there is no way of telling how many characters it has read,
605    because there may be binary zeros embedded in the data.
606    
607    Arguments:
608      buffer     the buffer to read into
609      length     the maximum number of characters to read
610      f          the file
611    
612    Returns:     the number of characters read, zero at end of file
613    */
614    
615    static int
616    read_one_line(char *buffer, int length, FILE *f)
617    {
618    int c;
619    int yield = 0;
620    while ((c = fgetc(f)) != EOF)
621      {
622      buffer[yield++] = c;
623      if (c == '\n' || yield >= length) break;
624      }
625    return yield;
626    }
627    
628    
629    
630    /*************************************************
631    *             Find end of line                   *
632    *************************************************/
633    
634    /* The length of the endline sequence that is found is set via lenptr. This may
635    be zero at the very end of the file if there is no line-ending sequence there.
636    
637    Arguments:
638      p         current position in line
639      endptr    end of available data
640      lenptr    where to put the length of the eol sequence
641    
642    Returns:    pointer after the last byte of the line,
643                including the newline byte(s)
644    */
645    
646    static char *
647    end_of_line(char *p, char *endptr, int *lenptr)
648    {
649    switch(endlinetype)
650      {
651      default:      /* Just in case */
652      case EL_LF:
653      while (p < endptr && *p != '\n') p++;
654      if (p < endptr)
655        {
656        *lenptr = 1;
657        return p + 1;
658        }
659      *lenptr = 0;
660      return endptr;
661    
662      case EL_CR:
663      while (p < endptr && *p != '\r') p++;
664      if (p < endptr)
665        {
666        *lenptr = 1;
667        return p + 1;
668        }
669      *lenptr = 0;
670      return endptr;
671    
672      case EL_CRLF:
673      for (;;)
674        {
675        while (p < endptr && *p != '\r') p++;
676        if (++p >= endptr)
677          {
678          *lenptr = 0;
679          return endptr;
680          }
681        if (*p == '\n')
682          {
683          *lenptr = 2;
684          return p + 1;
685          }
686        }
687      break;
688    
689      case EL_ANYCRLF:
690      while (p < endptr)
691        {
692        int extra = 0;
693        register int c = *((unsigned char *)p);
694    
695        if (utf8 && c >= 0xc0)
696          {
697          int gcii, gcss;
698          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
699          gcss = 6*extra;
700          c = (c & utf8_table3[extra]) << gcss;
701          for (gcii = 1; gcii <= extra; gcii++)
702            {
703            gcss -= 6;
704            c |= (p[gcii] & 0x3f) << gcss;
705            }
706          }
707    
708        p += 1 + extra;
709    
710        switch (c)
711          {
712          case 0x0a:    /* LF */
713          *lenptr = 1;
714          return p;
715    
716          case 0x0d:    /* CR */
717          if (p < endptr && *p == 0x0a)
718            {
719            *lenptr = 2;
720            p++;
721            }
722          else *lenptr = 1;
723          return p;
724    
725          default:
726          break;
727          }
728        }   /* End of loop for ANYCRLF case */
729    
730      *lenptr = 0;  /* Must have hit the end */
731      return endptr;
732    
733      case EL_ANY:
734      while (p < endptr)
735        {
736        int extra = 0;
737        register int c = *((unsigned char *)p);
738    
739        if (utf8 && c >= 0xc0)
740          {
741          int gcii, gcss;
742          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
743          gcss = 6*extra;
744          c = (c & utf8_table3[extra]) << gcss;
745          for (gcii = 1; gcii <= extra; gcii++)
746            {
747            gcss -= 6;
748            c |= (p[gcii] & 0x3f) << gcss;
749            }
750          }
751    
752        p += 1 + extra;
753    
754        switch (c)
755          {
756          case 0x0a:    /* LF */
757          case 0x0b:    /* VT */
758          case 0x0c:    /* FF */
759          *lenptr = 1;
760          return p;
761    
762          case 0x0d:    /* CR */
763          if (p < endptr && *p == 0x0a)
764            {
765            *lenptr = 2;
766            p++;
767            }
768          else *lenptr = 1;
769          return p;
770    
771          case 0x85:    /* NEL */
772          *lenptr = utf8? 2 : 1;
773          return p;
774    
775          case 0x2028:  /* LS */
776          case 0x2029:  /* PS */
777          *lenptr = 3;
778          return p;
779    
780          default:
781          break;
782          }
783        }   /* End of loop for ANY case */
784    
785      *lenptr = 0;  /* Must have hit the end */
786      return endptr;
787      }     /* End of overall switch */
788    }
789    
790    
791    
792    /*************************************************
793    *         Find start of previous line            *
794    *************************************************/
795    
796    /* This is called when looking back for before lines to print.
797    
798    Arguments:
799      p         start of the subsequent line
800      startptr  start of available data
801    
802    Returns:    pointer to the start of the previous line
803    */
804    
805    static char *
806    previous_line(char *p, char *startptr)
807    {
808    switch(endlinetype)
809      {
810      default:      /* Just in case */
811      case EL_LF:
812      p--;
813      while (p > startptr && p[-1] != '\n') p--;
814      return p;
815    
816      case EL_CR:
817      p--;
818      while (p > startptr && p[-1] != '\n') p--;
819      return p;
820    
821      case EL_CRLF:
822      for (;;)
823        {
824        p -= 2;
825        while (p > startptr && p[-1] != '\n') p--;
826        if (p <= startptr + 1 || p[-2] == '\r') return p;
827        }
828      return p;   /* But control should never get here */
829    
830      case EL_ANY:
831      case EL_ANYCRLF:
832      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
833      if (utf8) while ((*p & 0xc0) == 0x80) p--;
834    
835      while (p > startptr)
836        {
837        register int c;
838        char *pp = p - 1;
839    
840        if (utf8)
841          {
842          int extra = 0;
843          while ((*pp & 0xc0) == 0x80) pp--;
844          c = *((unsigned char *)pp);
845          if (c >= 0xc0)
846            {
847            int gcii, gcss;
848            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
849            gcss = 6*extra;
850            c = (c & utf8_table3[extra]) << gcss;
851            for (gcii = 1; gcii <= extra; gcii++)
852              {
853              gcss -= 6;
854              c |= (pp[gcii] & 0x3f) << gcss;
855              }
856            }
857          }
858        else c = *((unsigned char *)pp);
859    
860        if (endlinetype == EL_ANYCRLF) switch (c)
861          {
862          case 0x0a:    /* LF */
863          case 0x0d:    /* CR */
864          return p;
865    
866          default:
867          break;
868          }
869    
870        else switch (c)
871          {
872          case 0x0a:    /* LF */
873          case 0x0b:    /* VT */
874          case 0x0c:    /* FF */
875          case 0x0d:    /* CR */
876          case 0x85:    /* NEL */
877          case 0x2028:  /* LS */
878          case 0x2029:  /* PS */
879          return p;
880    
881          default:
882          break;
883          }
884    
885        p = pp;  /* Back one character */
886        }        /* End of loop for ANY case */
887    
888      return startptr;  /* Hit start of data */
889      }     /* End of overall switch */
890    }
891    
892    
893    
894    
895    
896    /*************************************************
897  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
898  *************************************************/  *************************************************/
899    
900  /* This is called if we are about to lose said lines because of buffer filling,  /* This is called if we are about to lose said lines because of buffer filling,
901  and at the end of the file.  and at the end of the file. The data in the line is written using fwrite() so
902    that a binary zero does not terminate it.
903    
904  Arguments:  Arguments:
905    lastmatchnumber   the number of the last matching line, plus one    lastmatchnumber   the number of the last matching line, plus one
# Line 355  if (after_context > 0 && lastmatchnumber Line 918  if (after_context > 0 && lastmatchnumber
918    int count = 0;    int count = 0;
919    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
920      {      {
921        int ellength;
922      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
923      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
924      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
925      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
926      fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
927      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
928      }      }
929    hyphenpending = TRUE;    hyphenpending = TRUE;
930    }    }
# Line 369  if (after_context > 0 && lastmatchnumber Line 933  if (after_context > 0 && lastmatchnumber
933    
934    
935  /*************************************************  /*************************************************
936    *   Apply patterns to subject till one matches   *
937    *************************************************/
938    
939    /* This function is called to run through all patterns, looking for a match. It
940    is used multiple times for the same subject when colouring is enabled, in order
941    to find all possible matches.
942    
943    Arguments:
944      matchptr     the start of the subject
945      length       the length of the subject to match
946      startoffset  where to start matching
947      offsets      the offets vector to fill in
948      mrc          address of where to put the result of pcre_exec()
949    
950    Returns:      TRUE if there was a match
951                  FALSE if there was no match
952                  invert if there was a non-fatal error
953    */
954    
955    static BOOL
956    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
957      int *mrc)
958    {
959    int i;
960    size_t slen = length;
961    const char *msg = "this text:\n\n";
962    if (slen > 200)
963      {
964      slen = 200;
965      msg = "text that starts:\n\n";
966      }
967    for (i = 0; i < pattern_count; i++)
968      {
969      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
970        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
971      if (*mrc >= 0) return TRUE;
972      if (*mrc == PCRE_ERROR_NOMATCH) continue;
973      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
974      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
975      fprintf(stderr, "%s", msg);
976      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
977      fprintf(stderr, "\n\n");
978      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
979        resource_error = TRUE;
980      if (error_count++ > 20)
981        {
982        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
983        pcregrep_exit(2);
984        }
985      return invert;    /* No more matching; don't show the line again */
986      }
987    
988    return FALSE;  /* No match, no errors */
989    }
990    
991    
992    
993    /*************************************************
994  *            Grep an individual file             *  *            Grep an individual file             *
995  *************************************************/  *************************************************/
996    
997  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
998  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
999  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1000  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1001  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
1002  "before" context printing.  "before" context printing.
1003    
1004  Arguments:  Arguments:
1005    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
1006                   the gzFile pointer when reading is via libz
1007                   the BZFILE pointer when reading is via libbz2
1008      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1009      filename     the file name or NULL (for errors)
1010    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1011                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1012                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1013    
1014  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1015                 1 otherwise (no matches)                 1 otherwise (no matches)
1016                   2 if an overlong line is encountered
1017                   3 if there is a read error on a .bz2 file
1018  */  */
1019    
1020  static int  static int
1021  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1022  {  {
1023  int rc = 1;  int rc = 1;
1024  int linenumber = 1;  int linenumber = 1;
1025  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1026  int count = 0;  int count = 0;
1027  int offsets[99];  int filepos = 0;
1028    int offsets[OFFSET_SIZE];
1029  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1030  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1031  char *endptr;  char *endptr;
1032  size_t bufflength;  size_t bufflength;
1033  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1034    BOOL input_line_buffered = line_buffered;
1035    FILE *in = NULL;                    /* Ensure initialized */
1036    
1037    #ifdef SUPPORT_LIBZ
1038    gzFile ingz = NULL;
1039    #endif
1040    
1041    #ifdef SUPPORT_LIBBZ2
1042    BZFILE *inbz2 = NULL;
1043    #endif
1044    
1045    
1046    /* Do the first read into the start of the buffer and set up the pointer to end
1047    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1048    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1049    fail. */
1050    
1051  /* Do the first read into the start of the buffer and set up the pointer to  #ifdef SUPPORT_LIBZ
1052  end of what we have. */  if (frtype == FR_LIBZ)
1053      {
1054      ingz = (gzFile)handle;
1055      bufflength = gzread (ingz, main_buffer, bufsize);
1056      }
1057    else
1058    #endif
1059    
1060    #ifdef SUPPORT_LIBBZ2
1061    if (frtype == FR_LIBBZ2)
1062      {
1063      inbz2 = (BZFILE *)handle;
1064      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1065      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1066      }                                    /* without the cast it is unsigned. */
1067    else
1068    #endif
1069    
1070  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);    {
1071  endptr = buffer + bufflength;    in = (FILE *)handle;
1072      if (is_file_tty(in)) input_line_buffered = TRUE;
1073      bufflength = input_line_buffered?
1074        read_one_line(main_buffer, bufsize, in) :
1075        fread(main_buffer, 1, bufsize, in);
1076      }
1077    
1078    endptr = main_buffer + bufflength;
1079    
1080  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1081  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 417  way, the buffer is shifted left and re-f Line 1084  way, the buffer is shifted left and re-f
1084    
1085  while (ptr < endptr)  while (ptr < endptr)
1086    {    {
1087    int i;    int endlinelength;
1088    BOOL match = FALSE;    int mrc = 0;
1089      int startoffset = 0;
1090      BOOL match;
1091      char *matchptr = ptr;
1092    char *t = ptr;    char *t = ptr;
1093    size_t length, linelength;    size_t length, linelength;
1094    
1095    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1096    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1097    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1098    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1099    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1100    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1101      first line. */
1102    linelength = 0;  
1103    while (t < endptr && *t++ != '\n') linelength++;    t = end_of_line(t, endptr, &endlinelength);
1104    length = multiline? endptr - ptr : linelength;    linelength = t - ptr - endlinelength;
1105      length = multiline? (size_t)(endptr - ptr) : linelength;
1106    /* Run through all the patterns until one matches. Note that we don't include  
1107    the final newline in the subject string. */    /* Check to see if the line we are looking at extends right to the very end
1108      of the buffer without a line terminator. This means the line is too long to
1109    for (i = 0; !match && i < pattern_count; i++)    handle. */
1110      {  
1111      match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    if (endlinelength == 0 && t == main_buffer + bufsize)
1112        offsets, 99) >= 0;      {
1113        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1114                        "pcregrep: check the --buffer-size option\n",
1115                        linenumber,
1116                        (filename == NULL)? "" : " of file ",
1117                        (filename == NULL)? "" : filename);
1118        return 2;
1119      }      }
1120    
1121    /* If it's a match or a not-match (as required), print what's wanted. */    /* Extra processing for Jeffrey Friedl's debugging. */
1122    
1123    #ifdef JFRIEDL_DEBUG
1124      if (jfriedl_XT || jfriedl_XR)
1125      {
1126          #include <sys/time.h>
1127          #include <time.h>
1128          struct timeval start_time, end_time;
1129          struct timezone dummy;
1130          int i;
1131    
1132          if (jfriedl_XT)
1133          {
1134              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1135              const char *orig = ptr;
1136              ptr = malloc(newlen + 1);
1137              if (!ptr) {
1138                      printf("out of memory");
1139                      pcregrep_exit(2);
1140              }
1141              endptr = ptr;
1142              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1143              for (i = 0; i < jfriedl_XT; i++) {
1144                      strncpy(endptr, orig,  length);
1145                      endptr += length;
1146              }
1147              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1148              length = newlen;
1149          }
1150    
1151          if (gettimeofday(&start_time, &dummy) != 0)
1152                  perror("bad gettimeofday");
1153    
1154    
1155          for (i = 0; i < jfriedl_XR; i++)
1156              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1157                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1158    
1159          if (gettimeofday(&end_time, &dummy) != 0)
1160                  perror("bad gettimeofday");
1161    
1162          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1163                          -
1164                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1165    
1166          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1167          return 0;
1168      }
1169    #endif
1170    
1171      /* We come back here after a match when the -o option (only_matching) is set,
1172      in order to find any further matches in the same line. */
1173    
1174      ONLY_MATCHING_RESTART:
1175    
1176      /* Run through all the patterns until one matches or there is an error other
1177      than NOMATCH. This code is in a subroutine so that it can be re-used for
1178      finding subsequent matches when colouring matched lines. */
1179    
1180      match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1181    
1182      /* If it's a match or a not-match (as required), do what's wanted. */
1183    
1184    if (match != invert)    if (match != invert)
1185      {      {
1186      BOOL hyphenprinted = FALSE;      BOOL hyphenprinted = FALSE;
1187    
1188      if (filenames_nomatch_only) return 1;      /* We've failed if we want a file that doesn't have any matches. */
1189    
1190        if (filenames == FN_NOMATCH_ONLY) return 1;
1191    
1192        /* Just count if just counting is wanted. */
1193    
1194      if (count_only) count++;      if (count_only) count++;
1195    
1196      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
1197        in the file. */
1198    
1199        else if (filenames == FN_MATCH_ONLY)
1200        {        {
1201        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1202        return 0;        return 0;
1203        }        }
1204    
1205        /* Likewise, if all we want is a yes/no answer. */
1206    
1207      else if (quiet) return 0;      else if (quiet) return 0;
1208    
1209        /* The --only-matching option prints just the substring that matched, or a
1210        captured portion of it, as long as this string is not empty, and the
1211        --file-offsets and --line-offsets options output offsets for the matching
1212        substring (they both force --only-matching = 0). None of these options
1213        prints any context. Afterwards, adjust the start and then jump back to look
1214        for further matches in the same line. If we are in invert mode, however,
1215        nothing is printed and we do not restart - this could still be useful
1216        because the return code is set. */
1217    
1218        else if (only_matching >= 0)
1219          {
1220          if (!invert)
1221            {
1222            if (printname != NULL) fprintf(stdout, "%s:", printname);
1223            if (number) fprintf(stdout, "%d:", linenumber);
1224            if (line_offsets)
1225              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1226                offsets[1] - offsets[0]);
1227            else if (file_offsets)
1228              fprintf(stdout, "%d,%d\n",
1229                (int)(filepos + matchptr + offsets[0] - ptr),
1230                offsets[1] - offsets[0]);
1231            else if (only_matching < mrc)
1232              {
1233              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1234              if (plen > 0)
1235                {
1236                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1237                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1238                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1239                fprintf(stdout, "\n");
1240                }
1241              }
1242            else if (printname != NULL || number) fprintf(stdout, "\n");
1243            match = FALSE;
1244            if (line_buffered) fflush(stdout);
1245            rc = 0;                      /* Had some success */
1246            startoffset = offsets[1];    /* Restart after the match */
1247            goto ONLY_MATCHING_RESTART;
1248            }
1249          }
1250    
1251        /* This is the default case when none of the above options is set. We print
1252        the matching lines(s), possibly preceded and/or followed by other lines of
1253        context. */
1254    
1255      else      else
1256        {        {
1257        /* See if there is a requirement to print some "after" lines from a        /* See if there is a requirement to print some "after" lines from a
# Line 467  while (ptr < endptr) Line 1259  while (ptr < endptr)
1259    
1260        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1261          {          {
1262            int ellength;
1263          int linecount = 0;          int linecount = 0;
1264          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1265    
1266          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1267            {            {
1268            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1269            linecount++;            linecount++;
1270            }            }
1271    
1272          /* It is important to advance lastmatchrestart during this printing so          /* It is important to advance lastmatchrestart during this printing so
1273          that it interacts correctly with any "before" printing below. */          that it interacts correctly with any "before" printing below. Print
1274            each line's data using fwrite() in case there are binary zeroes. */
1275    
1276          while (lastmatchrestart < p)          while (lastmatchrestart < p)
1277            {            {
1278            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1279            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1280            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1281            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1282            fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1283            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1284            }            }
1285          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1286          }          }
# Line 509  while (ptr < endptr) Line 1302  while (ptr < endptr)
1302          int linecount = 0;          int linecount = 0;
1303          char *p = ptr;          char *p = ptr;
1304    
1305          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1306                 linecount++ < before_context)                 linecount < before_context)
1307            {            {
1308            p--;            linecount++;
1309            while (p > buffer && p[-1] != '\n') p--;            p = previous_line(p, main_buffer);
1310            }            }
1311    
1312          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 521  while (ptr < endptr) Line 1314  while (ptr < endptr)
1314    
1315          while (p < ptr)          while (p < ptr)
1316            {            {
1317              int ellength;
1318            char *pp = p;            char *pp = p;
1319            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1320            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1321            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1322            fprintf(stdout, "%.*s", pp - p + 1, p);            FWRITE(p, 1, pp - p, stdout);
1323            p = pp + 1;            p = pp;
1324            }            }
1325          }          }
1326    
1327        /* Now print the matching line(s); ensure we set hyphenpending at the end        /* Now print the matching line(s); ensure we set hyphenpending at the end
1328        of the file. */        of the file if any context lines are being output. */
1329    
1330          if (after_context > 0 || before_context > 0)
1331            endhyphenpending = TRUE;
1332    
       endhyphenpending = TRUE;  
1333        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (printname != NULL) fprintf(stdout, "%s:", printname);
1334        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
1335    
1336        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1337        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1338        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1339        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1340          the match will always be before the first newline sequence. */
1341    
1342        if (multiline)        if (multiline & !invert)
1343          {          {
1344          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1345          t = ptr;          t = ptr;
1346          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch)
1347          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1348          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &endlinelength);
1349              if (t < endmatch) linenumber++; else break;
1350              }
1351            linelength = t - ptr - endlinelength;
1352            }
1353    
1354          /*** NOTE: Use only fwrite() to output the data line, so that binary
1355          zeroes are treated as just another data character. */
1356    
1357          /* This extra option, for Jeffrey Friedl's debugging requirements,
1358          replaces the matched string, or a specific captured string if it exists,
1359          with X. When this happens, colouring is ignored. */
1360    
1361    #ifdef JFRIEDL_DEBUG
1362          if (S_arg >= 0 && S_arg < mrc)
1363            {
1364            int first = S_arg * 2;
1365            int last  = first + 1;
1366            FWRITE(ptr, 1, offsets[first], stdout);
1367            fprintf(stdout, "X");
1368            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1369            }
1370          else
1371    #endif
1372    
1373          /* We have to split the line(s) up if colouring, and search for further
1374          matches, but not of course if the line is a non-match. */
1375    
1376          if (do_colour && !invert)
1377            {
1378            int plength;
1379            FWRITE(ptr, 1, offsets[0], stdout);
1380            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1381            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1382            fprintf(stdout, "%c[00m", 0x1b);
1383            for (;;)
1384              {
1385              startoffset = offsets[1];
1386              if (startoffset >= linelength + endlinelength ||
1387                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1388                break;
1389              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1390              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1391              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1392              fprintf(stdout, "%c[00m", 0x1b);
1393              }
1394    
1395            /* In multiline mode, we may have already printed the complete line
1396            and its line-ending characters (if they matched the pattern), so there
1397            may be no more to print. */
1398    
1399            plength = (linelength + endlinelength) - startoffset;
1400            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1401          }          }
1402    
1403        fprintf(stdout, "%.*s\n", linelength, ptr);        /* Not colouring; no need to search for further matches */
1404    
1405          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1406        }        }
1407    
1408        /* End of doing what has to be done for a match. If --line-buffered was
1409        given, flush the output. */
1410    
1411        if (line_buffered) fflush(stdout);
1412      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1413    
1414      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1415      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1416    
1417      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1418      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1419      }      }
1420    
1421    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1422      anything to be printed), we have to move on to the end of the match before
1423      proceeding. */
1424    
1425      if (multiline && invert && match)
1426        {
1427        int ellength;
1428        char *endmatch = ptr + offsets[1];
1429        t = ptr;
1430        while (t < endmatch)
1431          {
1432          t = end_of_line(t, endptr, &ellength);
1433          if (t <= endmatch) linenumber++; else break;
1434          }
1435        endmatch = end_of_line(endmatch, endptr, &ellength);
1436        linelength = endmatch - ptr - ellength;
1437        }
1438    
1439    ptr += linelength + 1;    /* Advance to after the newline and increment the line number. The file
1440      offset to the current line is maintained in filepos. */
1441    
1442      ptr += linelength + endlinelength;
1443      filepos += (int)(linelength + endlinelength);
1444    linenumber++;    linenumber++;
1445    
1446      /* If input is line buffered, and the buffer is not yet full, read another
1447      line and add it into the buffer. */
1448    
1449      if (input_line_buffered && bufflength < bufsize)
1450        {
1451        int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
1452        bufflength += add;
1453        endptr += add;
1454        }
1455    
1456    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1457    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1458    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1459    about to be lost, print them. */    about to be lost, print them. */
1460    
1461    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= bufsize && ptr > main_buffer + 2*bufthird)
1462      {      {
1463      if (after_context > 0 &&      if (after_context > 0 &&
1464          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1465          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1466        {        {
1467        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1468        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 585  while (ptr < endptr) Line 1470  while (ptr < endptr)
1470    
1471      /* Now do the shuffle */      /* Now do the shuffle */
1472    
1473      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1474      ptr -= MBUFTHIRD;      ptr -= bufthird;
1475      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1476      endptr = buffer + bufflength;  #ifdef SUPPORT_LIBZ
1477        if (frtype == FR_LIBZ)
1478          bufflength = 2*bufthird +
1479            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1480        else
1481    #endif
1482    
1483    #ifdef SUPPORT_LIBBZ2
1484        if (frtype == FR_LIBBZ2)
1485          bufflength = 2*bufthird +
1486            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1487        else
1488    #endif
1489    
1490        bufflength = 2*bufthird +
1491          (input_line_buffered?
1492           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1493           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1494        endptr = main_buffer + bufflength;
1495    
1496      /* Adjust any last match point */      /* Adjust any last match point */
1497    
1498      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1499      }      }
1500    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1501    
1502  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1503  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1504    
1505  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);  if (only_matching < 0 && !count_only)
1506  hyphenpending |= endhyphenpending;    {
1507      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1508      hyphenpending |= endhyphenpending;
1509      }
1510    
1511  /* Print the file name if we are looking for those without matches and there  /* Print the file name if we are looking for those without matches and there
1512  were none. If we found a match, we won't have got this far. */  were none. If we found a match, we won't have got this far. */
1513    
1514  if (filenames_nomatch_only)  if (filenames == FN_NOMATCH_ONLY)
1515    {    {
1516    fprintf(stdout, "%s\n", printname);    fprintf(stdout, "%s\n", printname);
1517    return 0;    return 0;
# Line 615  if (filenames_nomatch_only) Line 1521  if (filenames_nomatch_only)
1521    
1522  if (count_only)  if (count_only)
1523    {    {
1524    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1525    fprintf(stdout, "%d\n", count);      {
1526        if (printname != NULL && filenames != FN_NONE)
1527          fprintf(stdout, "%s:", printname);
1528        fprintf(stdout, "%d\n", count);
1529        }
1530    }    }
1531    
1532  return rc;  return rc;
# Line 633  recursing; if it's a file, grep it. Line 1543  recursing; if it's a file, grep it.
1543    
1544  Arguments:  Arguments:
1545    pathname          the path to investigate    pathname          the path to investigate
1546    dir_recurse       TRUE if recursing is wanted (-r)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   show_filenames    TRUE if file names are wanted for multiple files, except  
                       for the only file at top level when not filenames_only  
1547    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1548    
1549  Returns:   0 if there was at least one match  Returns:   0 if there was at least one match
# Line 646  However, file opening failures are suppr Line 1554  However, file opening failures are suppr
1554  */  */
1555    
1556  static int  static int
1557  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
1558  {  {
1559  int rc = 1;  int rc = 1;
1560  int sep;  int sep;
1561  FILE *in;  int frtype;
1562  char *printname;  int pathlen;
1563    void *handle;
1564    FILE *in = NULL;           /* Ensure initialized */
1565    
1566    #ifdef SUPPORT_LIBZ
1567    gzFile ingz = NULL;
1568    #endif
1569    
1570    #ifdef SUPPORT_LIBBZ2
1571    BZFILE *inbz2 = NULL;
1572    #endif
1573    
1574  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1575    
1576  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1577    {    {
1578    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN, stdin_name,
1579      (filenames_only || filenames_nomatch_only ||      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
     (show_filenames && !only_one_at_top))?  
1580        stdin_name : NULL);        stdin_name : NULL);
1581    }    }
1582    
1583  /* If the file is a directory and we are recursing, scan each file within it,  /* If the file is a directory, skip if skipping or if we are recursing, scan
1584  subject to any include or exclude patterns that were set. The scanning code is  each file and directory within it, subject to any include or exclude patterns
1585  localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1586    system-specific. */
1587    
1588  if ((sep = isdirectory(pathname)) != 0 && dir_recurse)  if ((sep = isdirectory(pathname)) != 0)
1589    {    {
1590    char buffer[1024];    if (dee_action == dee_SKIP) return 1;
1591    char *nextfile;    if (dee_action == dee_RECURSE)
1592    directory_type *dir = opendirectory(pathname);      {
1593        char buffer[1024];
1594        char *nextfile;
1595        directory_type *dir = opendirectory(pathname);
1596    
1597        if (dir == NULL)
1598          {
1599          if (!silent)
1600            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1601              strerror(errno));
1602          return 2;
1603          }
1604    
1605        while ((nextfile = readdirectory(dir)) != NULL)
1606          {
1607          int frc, nflen;
1608          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1609          nflen = (int)(strlen(nextfile));
1610    
1611          if (isdirectory(buffer))
1612            {
1613            if (exclude_dir_compiled != NULL &&
1614                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1615              continue;
1616    
1617            if (include_dir_compiled != NULL &&
1618                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1619              continue;
1620            }
1621          else
1622            {
1623            if (exclude_compiled != NULL &&
1624                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1625              continue;
1626    
1627            if (include_compiled != NULL &&
1628                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1629              continue;
1630            }
1631    
1632          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1633          if (frc > 1) rc = frc;
1634           else if (frc == 0 && rc == 1) rc = 0;
1635          }
1636    
1637        closedirectory(dir);
1638        return rc;
1639        }
1640      }
1641    
1642    /* If the file is not a directory and not a regular file, skip it if that's
1643    been requested. */
1644    
1645    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1646    
1647    /* Control reaches here if we have a regular file, or if we have a directory
1648    and recursion or skipping was not requested, or if we have anything else and
1649    skipping was not requested. The scan proceeds. If this is the first and only
1650    argument at top level, we don't show the file name, unless we are only showing
1651    the file name, or the filename was forced (-H). */
1652    
1653    if (dir == NULL)  pathlen = (int)(strlen(pathname));
1654    
1655    /* Open using zlib if it is supported and the file name ends with .gz. */
1656    
1657    #ifdef SUPPORT_LIBZ
1658    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1659      {
1660      ingz = gzopen(pathname, "rb");
1661      if (ingz == NULL)
1662      {      {
1663      if (!silent)      if (!silent)
1664        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1665          strerror(errno));          strerror(errno));
1666      return 2;      return 2;
1667      }      }
1668      handle = (void *)ingz;
1669      frtype = FR_LIBZ;
1670      }
1671    else
1672    #endif
1673    
1674    while ((nextfile = readdirectory(dir)) != NULL)  /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
     {  
     int frc, blen;  
     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);  
     blen = strlen(buffer);  
   
     if (exclude_compiled != NULL &&  
         pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
       continue;  
1675    
1676      if (include_compiled != NULL &&  #ifdef SUPPORT_LIBBZ2
1677          pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1678        continue;    {
1679      inbz2 = BZ2_bzopen(pathname, "rb");
1680      handle = (void *)inbz2;
1681      frtype = FR_LIBBZ2;
1682      }
1683    else
1684    #endif
1685    
1686      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);  /* Otherwise use plain fopen(). The label is so that we can come back here if
1687      if (frc > 1) rc = frc;  an attempt to read a .bz2 file indicates that it really is a plain file. */
      else if (frc == 0 && rc == 1) rc = 0;  
     }  
1688    
1689    closedirectory(dir);  #ifdef SUPPORT_LIBBZ2
1690    return rc;  PLAIN_FILE:
1691    #endif
1692      {
1693      in = fopen(pathname, "rb");
1694      handle = (void *)in;
1695      frtype = FR_PLAIN;
1696    }    }
1697    
1698  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* All the opening methods return errno when they fail. */
 the first and only argument at top level, we don't show the file name (unless  
 we are only showing the file name). Otherwise, control is via the  
 show_filenames variable. */  
1699    
1700  in = fopen(pathname, "r");  if (handle == NULL)
 if (in == NULL)  
1701    {    {
1702    if (!silent)    if (!silent)
1703      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 719  if (in == NULL) Line 1705  if (in == NULL)
1705    return 2;    return 2;
1706    }    }
1707    
1708  printname =  (filenames_only || filenames_nomatch_only ||  /* Now grep the file */
1709    (show_filenames && !only_one_at_top))? pathname : NULL;  
1710    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1711      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1712    
1713    /* Close in an appropriate manner. */
1714    
1715    #ifdef SUPPORT_LIBZ
1716    if (frtype == FR_LIBZ)
1717      gzclose(ingz);
1718    else
1719    #endif
1720    
1721    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1722    read failed. If the error indicates that the file isn't in fact bzipped, try
1723    again as a normal file. */
1724    
1725    #ifdef SUPPORT_LIBBZ2
1726    if (frtype == FR_LIBBZ2)
1727      {
1728      if (rc == 3)
1729        {
1730        int errnum;
1731        const char *err = BZ2_bzerror(inbz2, &errnum);
1732        if (errnum == BZ_DATA_ERROR_MAGIC)
1733          {
1734          BZ2_bzclose(inbz2);
1735          goto PLAIN_FILE;
1736          }
1737        else if (!silent)
1738          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1739            pathname, err);
1740        rc = 2;    /* The normal "something went wrong" code */
1741        }
1742      BZ2_bzclose(inbz2);
1743      }
1744    else
1745    #endif
1746    
1747  rc = pcregrep(in, printname);  /* Normal file close */
1748    
1749  fclose(in);  fclose(in);
1750    
1751    /* Pass back the yield from pcregrep(). */
1752    
1753  return rc;  return rc;
1754  }  }
1755    
# Line 738  return rc; Line 1763  return rc;
1763  static int  static int
1764  usage(int rc)  usage(int rc)
1765  {  {
1766  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");  option_item *op;
1767  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Usage: pcregrep [-");
1768    for (op = optionlist; op->one_char != 0; op++)
1769      {
1770      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1771      }
1772    fprintf(stderr, "] [long options] [pattern] [files]\n");
1773    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1774      "options.\n");
1775  return rc;  return rc;
1776  }  }
1777    
# Line 757  option_item *op; Line 1789  option_item *op;
1789    
1790  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1791  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1792  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1793  printf("\"-\" can be used as a file name to mean STDIN.\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1794    
1795    #ifdef SUPPORT_LIBZ
1796    printf("Files whose names end in .gz are read using zlib.\n");
1797    #endif
1798    
1799    #ifdef SUPPORT_LIBBZ2
1800    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1801    #endif
1802    
1803    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1804    printf("Other files and the standard input are read as plain files.\n\n");
1805    #else
1806    printf("All files are read as plain files, without any interpretation.\n\n");
1807    #endif
1808    
1809    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1810  printf("Options:\n");  printf("Options:\n");
1811    
1812  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
1813    {    {
1814    int n;    int n;
1815    char s[4];    char s[4];
1816    
1817      /* Two options were accidentally implemented and documented with underscores
1818      instead of hyphens in their names, something that was not noticed for quite a
1819      few releases. When fixing this, I left the underscored versions in the list
1820      in case people were using them. However, we don't want to display them in the
1821      help data. There are no other options that contain underscores, and we do not
1822      expect ever to implement such options. Therefore, just omit any option that
1823      contains an underscore. */
1824    
1825      if (strchr(op->long_name, '_') != NULL) continue;
1826    
1827    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1828    printf("  %s --%s%n", s, op->long_name, &n);    n = 31 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1829    if (n < 1) n = 1;    if (n < 1) n = 1;
1830    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1831    }    }
1832    
1833  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1834    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1835    printf("When reading patterns from a file instead of using a command line option,\n");
1836  printf("trailing white space is removed and blank lines are ignored.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1837  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1838      MAX_PATTERN_COUNT, PATBUFSIZE);
1839    
1840  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1841  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 794  handle_option(int letter, int options) Line 1853  handle_option(int letter, int options)
1853  {  {
1854  switch(letter)  switch(letter)
1855    {    {
1856    case -1:  help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1857      case N_HELP: help(); pcregrep_exit(0);
1858      case N_LOFFSETS: line_offsets = number = TRUE; break;
1859      case N_LBUFFER: line_buffered = TRUE; break;
1860    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1861    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1862      case 'H': filenames = FN_FORCE; break;
1863      case 'h': filenames = FN_NONE; break;
1864    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1865    case 'l': filenames_only = TRUE; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1866    case 'L': filenames_nomatch_only = TRUE; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1867    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1868    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1869      case 'o': only_matching = 0; break;
1870    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1871    case 'r': recurse = TRUE; break;    case 'r': dee_action = dee_RECURSE; break;
1872    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1873    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1874    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1875    case 'w': word_match = TRUE; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1876    case 'x': whole_lines = TRUE; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1877    
1878    case 'V':    case 'V':
1879    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1880    fprintf(stderr, "PCRE version %s\n", pcre_version());    pcregrep_exit(0);
   exit(0);  
1881    break;    break;
1882    
1883    default:    default:
1884    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1885    exit(usage(2));    pcregrep_exit(usage(2));
1886    }    }
1887    
1888  return options;  return options;
# Line 828  return options; Line 1892  return options;
1892    
1893    
1894  /*************************************************  /*************************************************
1895    *          Construct printed ordinal             *
1896    *************************************************/
1897    
1898    /* This turns a number into "1st", "3rd", etc. */
1899    
1900    static char *
1901    ordin(int n)
1902    {
1903    static char buffer[8];
1904    char *p = buffer;
1905    sprintf(p, "%d", n);
1906    while (*p != 0) p++;
1907    switch (n%10)
1908      {
1909      case 1: strcpy(p, "st"); break;
1910      case 2: strcpy(p, "nd"); break;
1911      case 3: strcpy(p, "rd"); break;
1912      default: strcpy(p, "th"); break;
1913      }
1914    return buffer;
1915    }
1916    
1917    
1918    
1919    /*************************************************
1920    *          Compile a single pattern              *
1921    *************************************************/
1922    
1923    /* When the -F option has been used, this is called for each substring.
1924    Otherwise it's called for each supplied pattern.
1925    
1926    Arguments:
1927      pattern        the pattern string
1928      options        the PCRE options
1929      filename       the file name, or NULL for a command-line pattern
1930      count          0 if this is the only command line pattern, or
1931                     number of the command line pattern, or
1932                     linenumber for a pattern from a file
1933    
1934    Returns:         TRUE on success, FALSE after an error
1935    */
1936    
1937    static BOOL
1938    compile_single_pattern(char *pattern, int options, char *filename, int count)
1939    {
1940    char buffer[PATBUFSIZE];
1941    const char *error;
1942    int errptr;
1943    
1944    if (pattern_count >= MAX_PATTERN_COUNT)
1945      {
1946      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1947        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1948      return FALSE;
1949      }
1950    
1951    sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1952      suffix[process_options]);
1953    pattern_list[pattern_count] =
1954      pcre_compile(buffer, options, &error, &errptr, pcretables);
1955    if (pattern_list[pattern_count] != NULL)
1956      {
1957      pattern_count++;
1958      return TRUE;
1959      }
1960    
1961    /* Handle compile errors */
1962    
1963    errptr -= (int)strlen(prefix[process_options]);
1964    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1965    
1966    if (filename == NULL)
1967      {
1968      if (count == 0)
1969        fprintf(stderr, "pcregrep: Error in command-line regex "
1970          "at offset %d: %s\n", errptr, error);
1971      else
1972        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1973          "at offset %d: %s\n", ordin(count), errptr, error);
1974      }
1975    else
1976      {
1977      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1978        "at offset %d: %s\n", count, filename, errptr, error);
1979      }
1980    
1981    return FALSE;
1982    }
1983    
1984    
1985    
1986    /*************************************************
1987    *           Compile one supplied pattern         *
1988    *************************************************/
1989    
1990    /* When the -F option has been used, each string may be a list of strings,
1991    separated by line breaks. They will be matched literally.
1992    
1993    Arguments:
1994      pattern        the pattern string
1995      options        the PCRE options
1996      filename       the file name, or NULL for a command-line pattern
1997      count          0 if this is the only command line pattern, or
1998                     number of the command line pattern, or
1999                     linenumber for a pattern from a file
2000    
2001    Returns:         TRUE on success, FALSE after an error
2002    */
2003    
2004    static BOOL
2005    compile_pattern(char *pattern, int options, char *filename, int count)
2006    {
2007    if ((process_options & PO_FIXED_STRINGS) != 0)
2008      {
2009      char *eop = pattern + strlen(pattern);
2010      char buffer[PATBUFSIZE];
2011      for(;;)
2012        {
2013        int ellength;
2014        char *p = end_of_line(pattern, eop, &ellength);
2015        if (ellength == 0)
2016          return compile_single_pattern(pattern, options, filename, count);
2017        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2018        pattern = p;
2019        if (!compile_single_pattern(buffer, options, filename, count))
2020          return FALSE;
2021        }
2022      }
2023    else return compile_single_pattern(pattern, options, filename, count);
2024    }
2025    
2026    
2027    
2028    /*************************************************
2029  *                Main program                    *  *                Main program                    *
2030  *************************************************/  *************************************************/
2031    
# Line 838  main(int argc, char **argv) Line 2036  main(int argc, char **argv)
2036  {  {
2037  int i, j;  int i, j;
2038  int rc = 1;  int rc = 1;
2039  int options = 0;  int pcre_options = 0;
2040    int cmd_pattern_count = 0;
2041    int hint_count = 0;
2042  int errptr;  int errptr;
 const char *error;  
2043  BOOL only_one_at_top;  BOOL only_one_at_top;
2044    char *patterns[MAX_PATTERN_COUNT];
2045    const char *locale_from = "--locale";
2046    const char *error;
2047    
2048    /* Set the default line ending value from the default in the PCRE library;
2049    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2050    Note that the return values from pcre_config(), though derived from the ASCII
2051    codes, are the same in EBCDIC environments, so we must use the actual values
2052    rather than escapes such as as '\r'. */
2053    
2054    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2055    switch(i)
2056      {
2057      default:               newline = (char *)"lf"; break;
2058      case 13:               newline = (char *)"cr"; break;
2059      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2060      case -1:               newline = (char *)"any"; break;
2061      case -2:               newline = (char *)"anycrlf"; break;
2062      }
2063    
2064  /* Process the options */  /* Process the options */
2065    
# Line 855  for (i = 1; i < argc; i++) Line 2073  for (i = 1; i < argc; i++)
2073    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2074    
2075    /* If we hit an argument that is just "-", it may be a reference to STDIN,    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2076    but only if we have previously had -f to define the patterns. */    but only if we have previously had -e or -f to define the patterns. */
2077    
2078    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2079      {      {
2080      if (pattern_filename != NULL) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2081        else exit(usage(2));        else pcregrep_exit(usage(2));
2082      }      }
2083    
2084    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 881  for (i = 1; i < argc; i++) Line 2099  for (i = 1; i < argc; i++)
2099      /* Some long options have data that follows after =, for example file=name.      /* Some long options have data that follows after =, for example file=name.
2100      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2101      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2102      with Jeff Friedl in preferring "regex" without the "p". These options are      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2103      entered in the table as "regex(p)". No option is in both these categories,      These options are entered in the table as "regex(p)". Options can be in
2104      fortunately. */      both these categories. */
2105    
2106      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2107        {        {
2108        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2109        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2110        if (opbra == NULL)     /* Not a (p) case */  
2111          /* Handle options with only one spelling of the name */
2112    
2113          if (opbra == NULL)     /* Does not contain '(' */
2114          {          {
2115          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2116            {            {
# Line 897  for (i = 1; i < argc; i++) Line 2118  for (i = 1; i < argc; i++)
2118            }            }
2119          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2120            {            {
2121            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2122            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2123                (int)strlen(arg) : (int)(argequals - arg);
2124            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2125              {              {
2126              option_data = arg + arglen;              option_data = arg + arglen;
# Line 911  for (i = 1; i < argc; i++) Line 2133  for (i = 1; i < argc; i++)
2133              }              }
2134            }            }
2135          }          }
2136        else                   /* Special case xxxx(p) */  
2137          /* Handle options with an alternate spelling of the name */
2138    
2139          else
2140          {          {
2141          char buff1[24];          char buff1[24];
2142          char buff2[24];          char buff2[24];
2143          int baselen = opbra - op->long_name;  
2144            int baselen = (int)(opbra - op->long_name);
2145            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2146            int arglen = (argequals == NULL || equals == NULL)?
2147              (int)strlen(arg) : (int)(argequals - arg);
2148    
2149          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2150          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2151            opbra + 1);  
2152          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2153               strncmp(arg, buff2, arglen) == 0)
2154              {
2155              if (equals != NULL && argequals != NULL)
2156                {
2157                option_data = argequals;
2158                if (*option_data == '=')
2159                  {
2160                  option_data++;
2161                  longopwasequals = TRUE;
2162                  }
2163                }
2164            break;            break;
2165              }
2166          }          }
2167        }        }
2168    
2169      if (op->one_char == 0)      if (op->one_char == 0)
2170        {        {
2171        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2172        exit(usage(2));        pcregrep_exit(usage(2));
2173        }        }
2174      }      }
2175    
2176      /* Jeffrey Friedl's debugging harness uses these additional options which
2177      are not in the right form for putting in the option table because they use
2178      only one hyphen, yet are more than one character long. By putting them
2179      separately here, they will not get displayed as part of the help() output,
2180      but I don't think Jeffrey will care about that. */
2181    
2182    #ifdef JFRIEDL_DEBUG
2183      else if (strcmp(argv[i], "-pre") == 0) {
2184              jfriedl_prefix = argv[++i];
2185              continue;
2186      } else if (strcmp(argv[i], "-post") == 0) {
2187              jfriedl_postfix = argv[++i];
2188              continue;
2189      } else if (strcmp(argv[i], "-XT") == 0) {
2190              sscanf(argv[++i], "%d", &jfriedl_XT);
2191              continue;
2192      } else if (strcmp(argv[i], "-XR") == 0) {
2193              sscanf(argv[++i], "%d", &jfriedl_XR);
2194              continue;
2195      }
2196    #endif
2197    
2198    
2199    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
2200    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
2201    
# Line 941  for (i = 1; i < argc; i++) Line 2206  for (i = 1; i < argc; i++)
2206      while (*s != 0)      while (*s != 0)
2207        {        {
2208        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2209          { if (*s == op->one_char) break; }          {
2210            if (*s == op->one_char) break;
2211            }
2212        if (op->one_char == 0)        if (op->one_char == 0)
2213          {          {
2214          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2215            *s, argv[i]);            *s, argv[i]);
2216          exit(usage(2));          pcregrep_exit(usage(2));
2217          }          }
2218        if (op->type != OP_NODATA || s[1] == 0)  
2219          /* Check for a single-character option that has data: OP_OP_NUMBER
2220          is used for one that either has a numerical number or defaults, i.e. the
2221          data is optional. If a digit follows, there is data; if not, carry on
2222          with other single-character options in the same string. */
2223    
2224          option_data = s+1;
2225          if (op->type == OP_OP_NUMBER)
2226            {
2227            if (isdigit((unsigned char)s[1])) break;
2228            }
2229          else   /* Check for end or a dataless option */
2230          {          {
2231          option_data = s+1;          if (op->type != OP_NODATA || s[1] == 0) break;
         break;  
2232          }          }
2233        options = handle_option(*s++, options);  
2234          /* Handle a single-character option with no data, then loop for the
2235          next character in the string. */
2236    
2237          pcre_options = handle_option(*s++, pcre_options);
2238        }        }
2239      }      }
2240    
2241    /* At this point we should have op pointing to a matched option */    /* At this point we should have op pointing to a matched option. If the type
2242      is NO_DATA, it means that there is no data, and the option might set
2243      something in the PCRE options. */
2244    
2245    if (op->type == OP_NODATA)    if (op->type == OP_NODATA)
     options = handle_option(op->one_char, options);  
   else  
2246      {      {
2247      if (*option_data == 0)      pcre_options = handle_option(op->one_char, pcre_options);
2248        continue;
2249        }
2250    
2251      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2252      either has a value or defaults to something. It cannot have data in a
2253      separate item. At the moment, the only such options are "colo(u)r",
2254      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2255    
2256      if (*option_data == 0 &&
2257          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2258        {
2259        switch (op->one_char)
2260        {        {
2261        if (i >= argc - 1 || longopwasequals)        case N_COLOUR:
2262          {        colour_option = (char *)"auto";
2263          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        break;
2264          exit(usage(2));  
2265          }        case 'o':
2266        option_data = argv[++i];        only_matching = 0;
2267          break;
2268    
2269    #ifdef JFRIEDL_DEBUG
2270          case 'S':
2271          S_arg = 0;
2272          break;
2273    #endif
2274        }        }
2275        continue;
2276        }
2277    
2278      /* Otherwise, find the data string for the option. */
2279    
2280      if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else    if (*option_data == 0)
2281        {
2282        if (i >= argc - 1 || longopwasequals)
2283        {        {
2284        char *endptr;        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2285        int n = strtoul(option_data, &endptr, 10);        pcregrep_exit(usage(2));
2286        if (*endptr != 0)        }
2287        option_data = argv[++i];
2288        }
2289    
2290      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2291      multiple times to create a list of patterns. */
2292    
2293      if (op->type == OP_PATLIST)
2294        {
2295        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2296          {
2297          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2298            MAX_PATTERN_COUNT);
2299          return 2;
2300          }
2301        patterns[cmd_pattern_count++] = option_data;
2302        }
2303    
2304      /* Otherwise, deal with single string or numeric data values. */
2305    
2306      else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2307               op->type != OP_OP_NUMBER)
2308        {
2309        *((char **)op->dataptr) = option_data;
2310        }
2311    
2312      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2313      only for unpicking arguments, so just keep it simple. */
2314    
2315      else
2316        {
2317        unsigned long int n = 0;
2318        char *endptr = option_data;
2319        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2320        while (isdigit((unsigned char)(*endptr)))
2321          n = n * 10 + (int)(*endptr++ - '0');
2322        if (toupper(*endptr) == 'K')
2323          {
2324          n *= 1024;
2325          endptr++;
2326          }
2327        else if (toupper(*endptr) == 'M')
2328          {
2329          n *= 1024*1024;
2330          endptr++;
2331          }
2332        if (*endptr != 0)
2333          {
2334          if (longop)
2335          {          {
2336          if (longop)          char *equals = strchr(op->long_name, '=');
2337            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2338              option_data, op->long_name);            (int)(equals - op->long_name);
2339          else          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2340            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",            option_data, nlen, op->long_name);
             option_data, op->one_char);  
         exit(usage(2));  
2341          }          }
2342        *((int *)op->dataptr) = n;        else
2343            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2344              option_data, op->one_char);
2345          pcregrep_exit(usage(2));
2346        }        }
2347        if (op->type == OP_LONGNUMBER)
2348            *((unsigned long int *)op->dataptr) = n;
2349        else
2350            *((int *)op->dataptr) = n;
2351      }      }
2352    }    }
2353    
# Line 1001  if (both_context > 0) Line 2360  if (both_context > 0)
2360    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2361    }    }
2362    
2363  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2364  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  However, the latter two set only_matching. */
2365    
2366  if (pattern_list == NULL || hints_list == NULL)  if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2367        (file_offsets && line_offsets))
2368    {    {
2369    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2370    return 2;      "and/or --line-offsets\n");
2371      pcregrep_exit(usage(2));
2372    }    }
2373    
2374  /* Compile the regular expression(s). */  if (file_offsets || line_offsets) only_matching = 0;
2375    
2376  if (pattern_filename != NULL)  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2377    LC_ALL environment variable is set, and if so, use it. */
2378    
2379    if (locale == NULL)
2380      {
2381      locale = getenv("LC_ALL");
2382      locale_from = "LCC_ALL";
2383      }
2384    
2385    if (locale == NULL)
2386    {    {
2387    FILE *f = fopen(pattern_filename, "r");    locale = getenv("LC_CTYPE");
2388    char buffer[MBUFTHIRD + 16];    locale_from = "LC_CTYPE";
2389    char *rdstart;    }
2390    int adjust = 0;  
2391    /* If a locale has been provided, set it, and generate the tables the PCRE
2392    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2393    
2394    if (f == NULL)  if (locale != NULL)
2395      {
2396      if (setlocale(LC_CTYPE, locale) == NULL)
2397      {      {
2398      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2399        strerror(errno));        locale, locale_from);
2400      return 2;      return 2;
2401      }      }
2402      pcretables = pcre_maketables();
2403      }
2404    
2405    /* Sort out colouring */
2406    
2407    if (whole_lines)  if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2408      {
2409      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2410      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2411      else
2412        {
2413        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2414          colour_option);
2415        return 2;
2416        }
2417      if (do_colour)
2418      {      {
2419      strcpy(buffer, "^(?:");      char *cs = getenv("PCREGREP_COLOUR");
2420      adjust = 4;      if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2421        if (cs != NULL) colour_string = cs;
2422      }      }
2423    else if (word_match)    }
2424    
2425    /* Interpret the newline type; the default settings are Unix-like. */
2426    
2427    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2428      {
2429      pcre_options |= PCRE_NEWLINE_CR;
2430      endlinetype = EL_CR;
2431      }
2432    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2433      {
2434      pcre_options |= PCRE_NEWLINE_LF;
2435      endlinetype = EL_LF;
2436      }
2437    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2438      {
2439      pcre_options |= PCRE_NEWLINE_CRLF;
2440      endlinetype = EL_CRLF;
2441      }
2442    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2443      {
2444      pcre_options |= PCRE_NEWLINE_ANY;
2445      endlinetype = EL_ANY;
2446      }
2447    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2448      {
2449      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2450      endlinetype = EL_ANYCRLF;
2451      }
2452    else
2453      {
2454      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2455      return 2;
2456      }
2457    
2458    /* Interpret the text values for -d and -D */
2459    
2460    if (dee_option != NULL)
2461      {
2462      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2463      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2464      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2465      else
2466      {      {
2467      strcpy(buffer, "\\b");      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2468      adjust = 2;      return 2;
2469      }      }
2470      }
2471    
2472    rdstart = buffer + adjust;  if (DEE_option != NULL)
2473    while (fgets(rdstart, MBUFTHIRD, f) != NULL)    {
2474      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2475      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2476      else
2477      {      {
2478      char *s = rdstart + (int)strlen(rdstart);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2479      if (pattern_count >= MAX_PATTERN_COUNT)      return 2;
       {  
       fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",  
         MAX_PATTERN_COUNT);  
       return 2;  
       }  
     while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;  
     if (s == rdstart) continue;  
     if (whole_lines) strcpy(s, ")$");  
       else if (word_match)strcpy(s, "\\b");  
         else *s = 0;  
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr - adjust, error);  
       return 2;  
       }  
2480      }      }
   fclose(f);  
2481    }    }
2482    
2483  /* If no file name, a single regex must be given inline. */  /* Check the values for Jeffrey Friedl's debugging options. */
2484    
2485  else  #ifdef JFRIEDL_DEBUG
2486    if (S_arg > 9)
2487    {    {
2488    char buffer[MBUFTHIRD + 16];    fprintf(stderr, "pcregrep: bad value for -S option\n");
2489    char *pat;    return 2;
2490    int adjust = 0;    }
2491    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2492      {
2493      if (jfriedl_XT == 0) jfriedl_XT = 1;
2494      if (jfriedl_XR == 0) jfriedl_XR = 1;
2495      }
2496    #endif
2497    
2498    /* Get memory for the main buffer, and to store the pattern and hints lists. */
2499    
2500    bufsize = 3*bufthird;
2501    main_buffer = (char *)malloc(bufsize);
2502    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2503    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2504    
2505    if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2506      {
2507      fprintf(stderr, "pcregrep: malloc failed\n");
2508      goto EXIT2;
2509      }
2510    
2511    /* If no patterns were provided by -e, and there is no file provided by -f,
2512    the first argument is the one and only pattern, and it must exist. */
2513    
2514    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2515      {
2516    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2517      patterns[cmd_pattern_count++] = argv[i++];
2518      }
2519    
2520    /* Compile the patterns that were provided on the command line, either by
2521    multiple uses of -e or as a single unkeyed pattern. */
2522    
2523    if (whole_lines)  for (j = 0; j < cmd_pattern_count; j++)
2524      {
2525      if (!compile_pattern(patterns[j], pcre_options, NULL,
2526           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2527        goto EXIT2;
2528      }
2529    
2530    /* Compile the regular expressions that are provided in a file. */
2531    
2532    if (pattern_filename != NULL)
2533      {
2534      int linenumber = 0;
2535      FILE *f;
2536      char *filename;
2537      char buffer[PATBUFSIZE];
2538    
2539      if (strcmp(pattern_filename, "-") == 0)
2540      {      {
2541      sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);      f = stdin;
2542      pat = buffer;      filename = stdin_name;
     adjust = 4;  
2543      }      }
2544    else if (word_match)    else
2545      {      {
2546      sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);      f = fopen(pattern_filename, "r");
2547      pat = buffer;      if (f == NULL)
2548      adjust = 2;        {
2549          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2550            strerror(errno));
2551          goto EXIT2;
2552          }
2553        filename = pattern_filename;
2554      }      }
   else pat = argv[i++];  
   
   pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);  
2555    
2556    if (pattern_list[0] == NULL)    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2557      {      {
2558      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",      char *s = buffer + (int)strlen(buffer);
2559        errptr - adjust, error);      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2560      return 2;      *s = 0;
2561        linenumber++;
2562        if (buffer[0] == 0) continue;   /* Skip blank lines */
2563        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2564          goto EXIT2;
2565      }      }
2566    pattern_count++;  
2567      if (f != stdin) fclose(f);
2568    }    }
2569    
2570  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times */
# Line 1109  for (j = 0; j < pattern_count; j++) Line 2577  for (j = 0; j < pattern_count; j++)
2577      char s[16];      char s[16];
2578      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2579      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2580      return 2;      goto EXIT2;
2581        }
2582      hint_count++;
2583      }
2584    
2585    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2586    pcre_extra block for each pattern. */
2587    
2588    if (match_limit > 0 || match_limit_recursion > 0)
2589      {
2590      for (j = 0; j < pattern_count; j++)
2591        {
2592        if (hints_list[j] == NULL)
2593          {
2594          hints_list[j] = malloc(sizeof(pcre_extra));
2595          if (hints_list[j] == NULL)
2596            {
2597            fprintf(stderr, "pcregrep: malloc failed\n");
2598            pcregrep_exit(2);
2599            }
2600          }
2601        if (match_limit > 0)
2602          {
2603          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2604          hints_list[j]->match_limit = match_limit;
2605          }
2606        if (match_limit_recursion > 0)
2607          {
2608          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2609          hints_list[j]->match_limit_recursion = match_limit_recursion;
2610          }
2611      }      }
2612    }    }
2613    
# Line 1117  for (j = 0; j < pattern_count; j++) Line 2615  for (j = 0; j < pattern_count; j++)
2615    
2616  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
2617    {    {
2618    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2619        pcretables);
2620    if (exclude_compiled == NULL)    if (exclude_compiled == NULL)
2621      {      {
2622      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2623        errptr, error);        errptr, error);
2624      return 2;      goto EXIT2;
2625      }      }
2626    }    }
2627    
2628  if (include_pattern != NULL)  if (include_pattern != NULL)
2629    {    {
2630    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2631        pcretables);
2632    if (include_compiled == NULL)    if (include_compiled == NULL)
2633      {      {
2634      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2635        errptr, error);        errptr, error);
2636      return 2;      goto EXIT2;
2637        }
2638      }
2639    
2640    if (exclude_dir_pattern != NULL)
2641      {
2642      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2643        pcretables);
2644      if (exclude_dir_compiled == NULL)
2645        {
2646        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2647          errptr, error);
2648        goto EXIT2;
2649        }
2650      }
2651    
2652    if (include_dir_pattern != NULL)
2653      {
2654      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2655        pcretables);
2656      if (include_dir_compiled == NULL)
2657        {
2658        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2659          errptr, error);
2660        goto EXIT2;
2661      }      }
2662    }    }
2663    
2664  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2665    
2666  if (i >= argc) return pcregrep(stdin,  if (i >= argc)
2667    (filenames_only || filenames_nomatch_only)? stdin_name : NULL);    {
2668      rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2669        (filenames > FN_DEFAULT)? stdin_name : NULL);
2670      goto EXIT;
2671      }
2672    
2673  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2674  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2675  the file name if the argument is not a directory and filenames_only is not set.  the file name if the argument is not a directory and filenames are not
2676  */  otherwise forced. */
2677    
2678  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2679    
2680  for (; i < argc; i++)  for (; i < argc; i++)
2681    {    {
2682    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2683        only_one_at_top);
2684    if (frc > 1) rc = frc;    if (frc > 1) rc = frc;
2685      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2686    }    }
2687    
2688  return rc;  EXIT:
2689    if (main_buffer != NULL) free(main_buffer);
2690    if (pattern_list != NULL)
2691      {
2692      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2693      free(pattern_list);
2694      }
2695    if (hints_list != NULL)
2696      {
2697      for (i = 0; i < hint_count; i++)
2698        {
2699        if (hints_list[i] != NULL) free(hints_list[i]);
2700        }
2701      free(hints_list);
2702      }
2703    pcregrep_exit(rc);
2704    
2705    EXIT2:
2706    rc = 2;
2707    goto EXIT;
2708  }  }
2709    
2710  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.77  
changed lines
  Added in v.654

  ViewVC Help
Powered by ViewVC 1.1.5