/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 779 by ph10, Fri Dec 2 10:39:32 2011 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
# Line 45  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.0 07-Jun-2005"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define PATBUFSIZE BUFSIZ
78  #else  #else
79  #define MBUFTHIRD 8192  #define PATBUFSIZE 8192
80  #endif  #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
118  *               Global variables                 *  *               Global variables                 *
119  *************************************************/  *************************************************/
120    
121    /* Jeffrey Friedl has some debugging requirements that are not part of the
122    regular code. */
123    
124    #ifdef JFRIEDL_DEBUG
125    static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130    #endif
131    
132    static int  endlinetype;
133    
134    static char *colour_string = (char *)"1;31";
135    static char *colour_option = NULL;
136    static char *dee_option = NULL;
137    static char *DEE_option = NULL;
138    static char *main_buffer = NULL;
139    static char *newline = NULL;
140  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
141  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
142    static char *locale = NULL;
143    
144    static const unsigned char *pcretables = NULL;
145    
146  static int  pattern_count = 0;  static int  pattern_count = 0;
147  static pcre **pattern_list;  static pcre **pattern_list = NULL;
148  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
149    
150  static char *include_pattern = NULL;  static char *include_pattern = NULL;
151  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
152    static char *include_dir_pattern = NULL;
153    static char *exclude_dir_pattern = NULL;
154    
155  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
156  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
157    static pcre *include_dir_compiled = NULL;
158    static pcre *exclude_dir_compiled = NULL;
159    
160  static int after_context = 0;  static int after_context = 0;
161  static int before_context = 0;  static int before_context = 0;
162  static int both_context = 0;  static int both_context = 0;
163    static int bufthird = PCREGREP_BUFSIZE;
164    static int bufsize = 3*PCREGREP_BUFSIZE;
165    static int dee_action = dee_READ;
166    static int DEE_action = DEE_READ;
167    static int error_count = 0;
168    static int filenames = FN_DEFAULT;
169    static int only_matching = -1;
170    static int process_options = 0;
171    
172    #ifdef SUPPORT_PCREGREP_JIT
173    static int study_options = PCRE_STUDY_JIT_COMPILE;
174    #else
175    static int study_options = 0;
176    #endif
177    
178    static unsigned long int match_limit = 0;
179    static unsigned long int match_limit_recursion = 0;
180    
181  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
182  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
183  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
 static BOOL filenames_nomatch_only = FALSE;  
184  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
185  static BOOL invert = FALSE;  static BOOL invert = FALSE;
186    static BOOL line_buffered = FALSE;
187    static BOOL line_offsets = FALSE;
188  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
189  static BOOL number = FALSE;  static BOOL number = FALSE;
190    static BOOL omit_zero_count = FALSE;
191    static BOOL resource_error = FALSE;
192  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
 static BOOL recurse = FALSE;  
193  static BOOL silent = FALSE;  static BOOL silent = FALSE;
194  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
 static BOOL word_match = FALSE;  
195    
196  /* Structure for options and list of them */  /* Structure for options and list of them */
197    
198  enum { OP_NODATA, OP_STRING, OP_NUMBER };  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
199           OP_OP_NUMBER, OP_PATLIST };
200    
201  typedef struct option_item {  typedef struct option_item {
202    int type;    int type;
# Line 112  typedef struct option_item { Line 206  typedef struct option_item {
206    const char *help_text;    const char *help_text;
207  } option_item;  } option_item;
208    
209    /* Options without a single-letter equivalent get a negative value. This can be
210    used to identify them. */
211    
212    #define N_COLOUR       (-1)
213    #define N_EXCLUDE      (-2)
214    #define N_EXCLUDE_DIR  (-3)
215    #define N_HELP         (-4)
216    #define N_INCLUDE      (-5)
217    #define N_INCLUDE_DIR  (-6)
218    #define N_LABEL        (-7)
219    #define N_LOCALE       (-8)
220    #define N_NULL         (-9)
221    #define N_LOFFSETS     (-10)
222    #define N_FOFFSETS     (-11)
223    #define N_LBUFFER      (-12)
224    #define N_M_LIMIT      (-13)
225    #define N_M_LIMIT_REC  (-14)
226    #define N_BUFSIZE      (-15)
227    #define N_NOJIT        (-16)
228    
229  static option_item optionlist[] = {  static option_item optionlist[] = {
230    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
231    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
232    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
233    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
234    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
235    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
236    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
237    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
238    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
239    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
240    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
241    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
242    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
243    { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
244    { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
245    { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
246    { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
247    { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
248    { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },  #ifdef SUPPORT_PCREGREP_JIT
249    { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
250    { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },  #else
251    { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
252    { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },  #endif
253    { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
254    { OP_NODATA, 0,   NULL,               NULL,            NULL }    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
255      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
256      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
257      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
258      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
259      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
260      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
261      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
262      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
263      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
264      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
265      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
266      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
267      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
268      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
269      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
270      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
271    
272      /* These two were accidentally implemented with underscores instead of
273      hyphens in the option names. As this was not discovered for several releases,
274      the incorrect versions are left in the table for compatibility. However, the
275      --help function misses out any option that has an underscore in its name. */
276    
277      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
278      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
279    
280    #ifdef JFRIEDL_DEBUG
281      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
282    #endif
283      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
284      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
285      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
286      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
287      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
288      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
289      { OP_NODATA,    0,        NULL,               NULL,            NULL }
290  };  };
291    
292    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
293    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
294    that the combination of -w and -x has the same effect as -x on its own, so we
295    can treat them as the same. */
296    
297    static const char *prefix[] = {
298      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
299    
300    static const char *suffix[] = {
301      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
302    
303    /* UTF-8 tables - used only when the newline setting is "any". */
304    
305    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
306    
307    const char utf8_table4[] = {
308      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
309      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
310      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
311      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
312    
313    
314    
315    /*************************************************
316    *         Exit from the program                  *
317    *************************************************/
318    
319    /* If there has been a resource error, give a suitable message.
320    
321    Argument:  the return code
322    Returns:   does not return
323    */
324    
325    static void
326    pcregrep_exit(int rc)
327    {
328    if (resource_error)
329      {
330      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
331        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
332        PCRE_ERROR_JIT_STACKLIMIT);
333      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
334      }
335    
336    exit(rc);
337    }
338    
339    
340  /*************************************************  /*************************************************
341  *       Functions for directory scanning         *  *            OS-specific functions               *
342  *************************************************/  *************************************************/
343    
344  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
345  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
346    
347    
348  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
349    
350  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
351  #include <sys/types.h>  #include <sys/types.h>
352  #include <sys/stat.h>  #include <sys/stat.h>
353  #include <dirent.h>  #include <dirent.h>
# Line 184  for (;;) Line 379  for (;;)
379    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
380      return dent->d_name;      return dent->d_name;
381    }    }
382  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
383  }  }
384    
385  static void  static void
# Line 194  closedir(dir); Line 389  closedir(dir);
389  }  }
390    
391    
392    /************* Test for regular file in Unix **********/
393    
394    static int
395    isregfile(char *filename)
396    {
397    struct stat statbuf;
398    if (stat(filename, &statbuf) < 0)
399      return 1;        /* In the expectation that opening as a file will fail */
400    return (statbuf.st_mode & S_IFMT) == S_IFREG;
401    }
402    
403    
404    /************* Test for a terminal in Unix **********/
405    
406    static BOOL
407    is_stdout_tty(void)
408    {
409    return isatty(fileno(stdout));
410    }
411    
412    static BOOL
413    is_file_tty(FILE *f)
414    {
415    return isatty(fileno(f));
416    }
417    
418    
419  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
420    
421  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
422  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
423  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
424    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
425    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
426    undefined when it is indeed undefined. */
427    
428    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
 #elif HAVE_WIN32API  
429    
430  #ifndef STRICT  #ifndef STRICT
431  # define STRICT  # define STRICT
# Line 209  when it did not exist. */ Line 433  when it did not exist. */
433  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
434  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
435  #endif  #endif
436    
437    #include <windows.h>
438    
439  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
440  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
441  #endif  #endif
442    
 #include <windows.h>  
   
443  typedef struct directory_type  typedef struct directory_type
444  {  {
445  HANDLE handle;  HANDLE handle;
# Line 244  dir = (directory_type *) malloc(sizeof(* Line 469  dir = (directory_type *) malloc(sizeof(*
469  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
470    {    {
471    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
472    exit(2);    pcregrep_exit(2);
473    }    }
474  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
475  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 292  free(dir); Line 517  free(dir);
517  }  }
518    
519    
520    /************* Test for regular file in Win32 **********/
521    
522    /* I don't know how to do this, or if it can be done; assume all paths are
523    regular if they are not directories. */
524    
525    int isregfile(char *filename)
526    {
527    return !isdirectory(filename);
528    }
529    
530    
531    /************* Test for a terminal in Win32 **********/
532    
533    /* I don't know how to do this; assume never */
534    
535    static BOOL
536    is_stdout_tty(void)
537    {
538    return FALSE;
539    }
540    
541    static BOOL
542    is_file_tty(FILE *f)
543    {
544    return FALSE;
545    }
546    
547    
548  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
549    
550  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 300  free(dir); Line 553  free(dir);
553    
554  typedef void directory_type;  typedef void directory_type;
555    
556  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
557  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
558  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
559  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
560    
561    
562    /************* Test for regular when we can't do it **********/
563    
564    /* Assume all files are regular. */
565    
566    int isregfile(char *filename) { return 1; }
567    
568    
569    /************* Test for a terminal when we can't do it **********/
570    
571    static BOOL
572    is_stdout_tty(void)
573    {
574    return FALSE;
575    }
576    
577    static BOOL
578    is_file_tty(FILE *f)
579    {
580    return FALSE;
581    }
582    
583  #endif  #endif
584    
585    
586    
587  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
588  /*************************************************  /*************************************************
589  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
590  *************************************************/  *************************************************/
# Line 332  return sys_errlist[n]; Line 607  return sys_errlist[n];
607    
608    
609  /*************************************************  /*************************************************
610    *            Read one line of input              *
611    *************************************************/
612    
613    /* Normally, input is read using fread() into a large buffer, so many lines may
614    be read at once. However, doing this for tty input means that no output appears
615    until a lot of input has been typed. Instead, tty input is handled line by
616    line. We cannot use fgets() for this, because it does not stop at a binary
617    zero, and therefore there is no way of telling how many characters it has read,
618    because there may be binary zeros embedded in the data.
619    
620    Arguments:
621      buffer     the buffer to read into
622      length     the maximum number of characters to read
623      f          the file
624    
625    Returns:     the number of characters read, zero at end of file
626    */
627    
628    static int
629    read_one_line(char *buffer, int length, FILE *f)
630    {
631    int c;
632    int yield = 0;
633    while ((c = fgetc(f)) != EOF)
634      {
635      buffer[yield++] = c;
636      if (c == '\n' || yield >= length) break;
637      }
638    return yield;
639    }
640    
641    
642    
643    /*************************************************
644    *             Find end of line                   *
645    *************************************************/
646    
647    /* The length of the endline sequence that is found is set via lenptr. This may
648    be zero at the very end of the file if there is no line-ending sequence there.
649    
650    Arguments:
651      p         current position in line
652      endptr    end of available data
653      lenptr    where to put the length of the eol sequence
654    
655    Returns:    pointer after the last byte of the line,
656                including the newline byte(s)
657    */
658    
659    static char *
660    end_of_line(char *p, char *endptr, int *lenptr)
661    {
662    switch(endlinetype)
663      {
664      default:      /* Just in case */
665      case EL_LF:
666      while (p < endptr && *p != '\n') p++;
667      if (p < endptr)
668        {
669        *lenptr = 1;
670        return p + 1;
671        }
672      *lenptr = 0;
673      return endptr;
674    
675      case EL_CR:
676      while (p < endptr && *p != '\r') p++;
677      if (p < endptr)
678        {
679        *lenptr = 1;
680        return p + 1;
681        }
682      *lenptr = 0;
683      return endptr;
684    
685      case EL_CRLF:
686      for (;;)
687        {
688        while (p < endptr && *p != '\r') p++;
689        if (++p >= endptr)
690          {
691          *lenptr = 0;
692          return endptr;
693          }
694        if (*p == '\n')
695          {
696          *lenptr = 2;
697          return p + 1;
698          }
699        }
700      break;
701    
702      case EL_ANYCRLF:
703      while (p < endptr)
704        {
705        int extra = 0;
706        register int c = *((unsigned char *)p);
707    
708        if (utf8 && c >= 0xc0)
709          {
710          int gcii, gcss;
711          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
712          gcss = 6*extra;
713          c = (c & utf8_table3[extra]) << gcss;
714          for (gcii = 1; gcii <= extra; gcii++)
715            {
716            gcss -= 6;
717            c |= (p[gcii] & 0x3f) << gcss;
718            }
719          }
720    
721        p += 1 + extra;
722    
723        switch (c)
724          {
725          case 0x0a:    /* LF */
726          *lenptr = 1;
727          return p;
728    
729          case 0x0d:    /* CR */
730          if (p < endptr && *p == 0x0a)
731            {
732            *lenptr = 2;
733            p++;
734            }
735          else *lenptr = 1;
736          return p;
737    
738          default:
739          break;
740          }
741        }   /* End of loop for ANYCRLF case */
742    
743      *lenptr = 0;  /* Must have hit the end */
744      return endptr;
745    
746      case EL_ANY:
747      while (p < endptr)
748        {
749        int extra = 0;
750        register int c = *((unsigned char *)p);
751    
752        if (utf8 && c >= 0xc0)
753          {
754          int gcii, gcss;
755          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
756          gcss = 6*extra;
757          c = (c & utf8_table3[extra]) << gcss;
758          for (gcii = 1; gcii <= extra; gcii++)
759            {
760            gcss -= 6;
761            c |= (p[gcii] & 0x3f) << gcss;
762            }
763          }
764    
765        p += 1 + extra;
766    
767        switch (c)
768          {
769          case 0x0a:    /* LF */
770          case 0x0b:    /* VT */
771          case 0x0c:    /* FF */
772          *lenptr = 1;
773          return p;
774    
775          case 0x0d:    /* CR */
776          if (p < endptr && *p == 0x0a)
777            {
778            *lenptr = 2;
779            p++;
780            }
781          else *lenptr = 1;
782          return p;
783    
784          case 0x85:    /* NEL */
785          *lenptr = utf8? 2 : 1;
786          return p;
787    
788          case 0x2028:  /* LS */
789          case 0x2029:  /* PS */
790          *lenptr = 3;
791          return p;
792    
793          default:
794          break;
795          }
796        }   /* End of loop for ANY case */
797    
798      *lenptr = 0;  /* Must have hit the end */
799      return endptr;
800      }     /* End of overall switch */
801    }
802    
803    
804    
805    /*************************************************
806    *         Find start of previous line            *
807    *************************************************/
808    
809    /* This is called when looking back for before lines to print.
810    
811    Arguments:
812      p         start of the subsequent line
813      startptr  start of available data
814    
815    Returns:    pointer to the start of the previous line
816    */
817    
818    static char *
819    previous_line(char *p, char *startptr)
820    {
821    switch(endlinetype)
822      {
823      default:      /* Just in case */
824      case EL_LF:
825      p--;
826      while (p > startptr && p[-1] != '\n') p--;
827      return p;
828    
829      case EL_CR:
830      p--;
831      while (p > startptr && p[-1] != '\n') p--;
832      return p;
833    
834      case EL_CRLF:
835      for (;;)
836        {
837        p -= 2;
838        while (p > startptr && p[-1] != '\n') p--;
839        if (p <= startptr + 1 || p[-2] == '\r') return p;
840        }
841      return p;   /* But control should never get here */
842    
843      case EL_ANY:
844      case EL_ANYCRLF:
845      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
846      if (utf8) while ((*p & 0xc0) == 0x80) p--;
847    
848      while (p > startptr)
849        {
850        register int c;
851        char *pp = p - 1;
852    
853        if (utf8)
854          {
855          int extra = 0;
856          while ((*pp & 0xc0) == 0x80) pp--;
857          c = *((unsigned char *)pp);
858          if (c >= 0xc0)
859            {
860            int gcii, gcss;
861            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
862            gcss = 6*extra;
863            c = (c & utf8_table3[extra]) << gcss;
864            for (gcii = 1; gcii <= extra; gcii++)
865              {
866              gcss -= 6;
867              c |= (pp[gcii] & 0x3f) << gcss;
868              }
869            }
870          }
871        else c = *((unsigned char *)pp);
872    
873        if (endlinetype == EL_ANYCRLF) switch (c)
874          {
875          case 0x0a:    /* LF */
876          case 0x0d:    /* CR */
877          return p;
878    
879          default:
880          break;
881          }
882    
883        else switch (c)
884          {
885          case 0x0a:    /* LF */
886          case 0x0b:    /* VT */
887          case 0x0c:    /* FF */
888          case 0x0d:    /* CR */
889          case 0x85:    /* NEL */
890          case 0x2028:  /* LS */
891          case 0x2029:  /* PS */
892          return p;
893    
894          default:
895          break;
896          }
897    
898        p = pp;  /* Back one character */
899        }        /* End of loop for ANY case */
900    
901      return startptr;  /* Hit start of data */
902      }     /* End of overall switch */
903    }
904    
905    
906    
907    
908    
909    /*************************************************
910  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
911  *************************************************/  *************************************************/
912    
913  /* This is called if we are about to lose said lines because of buffer filling,  /* This is called if we are about to lose said lines because of buffer filling,
914  and at the end of the file.  and at the end of the file. The data in the line is written using fwrite() so
915    that a binary zero does not terminate it.
916    
917  Arguments:  Arguments:
918    lastmatchnumber   the number of the last matching line, plus one    lastmatchnumber   the number of the last matching line, plus one
# Line 355  if (after_context > 0 && lastmatchnumber Line 931  if (after_context > 0 && lastmatchnumber
931    int count = 0;    int count = 0;
932    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
933      {      {
934        int ellength;
935      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
936      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
937      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
938      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
939      fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
940      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
941      }      }
942    hyphenpending = TRUE;    hyphenpending = TRUE;
943    }    }
# Line 369  if (after_context > 0 && lastmatchnumber Line 946  if (after_context > 0 && lastmatchnumber
946    
947    
948  /*************************************************  /*************************************************
949    *   Apply patterns to subject till one matches   *
950    *************************************************/
951    
952    /* This function is called to run through all patterns, looking for a match. It
953    is used multiple times for the same subject when colouring is enabled, in order
954    to find all possible matches.
955    
956    Arguments:
957      matchptr     the start of the subject
958      length       the length of the subject to match
959      startoffset  where to start matching
960      offsets      the offets vector to fill in
961      mrc          address of where to put the result of pcre_exec()
962    
963    Returns:      TRUE if there was a match
964                  FALSE if there was no match
965                  invert if there was a non-fatal error
966    */
967    
968    static BOOL
969    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
970      int *mrc)
971    {
972    int i;
973    size_t slen = length;
974    const char *msg = "this text:\n\n";
975    if (slen > 200)
976      {
977      slen = 200;
978      msg = "text that starts:\n\n";
979      }
980    for (i = 0; i < pattern_count; i++)
981      {
982      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
983        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
984      if (*mrc >= 0) return TRUE;
985      if (*mrc == PCRE_ERROR_NOMATCH) continue;
986      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
987      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
988      fprintf(stderr, "%s", msg);
989      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
990      fprintf(stderr, "\n\n");
991      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
992          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
993        resource_error = TRUE;
994      if (error_count++ > 20)
995        {
996        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
997        pcregrep_exit(2);
998        }
999      return invert;    /* No more matching; don't show the line again */
1000      }
1001    
1002    return FALSE;  /* No match, no errors */
1003    }
1004    
1005    
1006    
1007    /*************************************************
1008  *            Grep an individual file             *  *            Grep an individual file             *
1009  *************************************************/  *************************************************/
1010    
1011  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1012  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1013  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1014  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1015  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
1016  "before" context printing.  "before" context printing.
1017    
1018  Arguments:  Arguments:
1019    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
1020                   the gzFile pointer when reading is via libz
1021                   the BZFILE pointer when reading is via libbz2
1022      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1023      filename     the file name or NULL (for errors)
1024    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1025                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1026                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1027    
1028  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1029                 1 otherwise (no matches)                 1 otherwise (no matches)
1030                   2 if an overlong line is encountered
1031                   3 if there is a read error on a .bz2 file
1032  */  */
1033    
1034  static int  static int
1035  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1036  {  {
1037  int rc = 1;  int rc = 1;
1038  int linenumber = 1;  int linenumber = 1;
1039  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1040  int count = 0;  int count = 0;
1041  int offsets[99];  int filepos = 0;
1042    int offsets[OFFSET_SIZE];
1043  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1044  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1045  char *endptr;  char *endptr;
1046  size_t bufflength;  size_t bufflength;
1047  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1048    BOOL input_line_buffered = line_buffered;
1049    FILE *in = NULL;                    /* Ensure initialized */
1050    
1051    #ifdef SUPPORT_LIBZ
1052    gzFile ingz = NULL;
1053    #endif
1054    
1055    #ifdef SUPPORT_LIBBZ2
1056    BZFILE *inbz2 = NULL;
1057    #endif
1058    
1059    
1060    /* Do the first read into the start of the buffer and set up the pointer to end
1061    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1062    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1063    fail. */
1064    
1065    #ifdef SUPPORT_LIBZ
1066    if (frtype == FR_LIBZ)
1067      {
1068      ingz = (gzFile)handle;
1069      bufflength = gzread (ingz, main_buffer, bufsize);
1070      }
1071    else
1072    #endif
1073    
1074    #ifdef SUPPORT_LIBBZ2
1075    if (frtype == FR_LIBBZ2)
1076      {
1077      inbz2 = (BZFILE *)handle;
1078      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1079      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1080      }                                    /* without the cast it is unsigned. */
1081    else
1082    #endif
1083    
1084  /* Do the first read into the start of the buffer and set up the pointer to    {
1085  end of what we have. */    in = (FILE *)handle;
1086      if (is_file_tty(in)) input_line_buffered = TRUE;
1087      bufflength = input_line_buffered?
1088        read_one_line(main_buffer, bufsize, in) :
1089        fread(main_buffer, 1, bufsize, in);
1090      }
1091    
1092  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  endptr = main_buffer + bufflength;
 endptr = buffer + bufflength;  
1093    
1094  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1095  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 417  way, the buffer is shifted left and re-f Line 1098  way, the buffer is shifted left and re-f
1098    
1099  while (ptr < endptr)  while (ptr < endptr)
1100    {    {
1101    int i;    int endlinelength;
1102    BOOL match = FALSE;    int mrc = 0;
1103      int startoffset = 0;
1104      BOOL match;
1105      char *matchptr = ptr;
1106    char *t = ptr;    char *t = ptr;
1107    size_t length, linelength;    size_t length, linelength;
1108    
1109    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1110    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1111    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1112    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1113    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1114    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1115      first line. */
1116    
1117      t = end_of_line(t, endptr, &endlinelength);
1118      linelength = t - ptr - endlinelength;
1119      length = multiline? (size_t)(endptr - ptr) : linelength;
1120    
1121      /* Check to see if the line we are looking at extends right to the very end
1122      of the buffer without a line terminator. This means the line is too long to
1123      handle. */
1124    
1125      if (endlinelength == 0 && t == main_buffer + bufsize)
1126        {
1127        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1128                        "pcregrep: check the --buffer-size option\n",
1129                        linenumber,
1130                        (filename == NULL)? "" : " of file ",
1131                        (filename == NULL)? "" : filename);
1132        return 2;
1133        }
1134    
1135    linelength = 0;    /* Extra processing for Jeffrey Friedl's debugging. */
   while (t < endptr && *t++ != '\n') linelength++;  
   length = multiline? endptr - ptr : linelength;  
1136    
1137    /* Run through all the patterns until one matches. Note that we don't include  #ifdef JFRIEDL_DEBUG
1138    the final newline in the subject string. */    if (jfriedl_XT || jfriedl_XR)
1139      {
1140          #include <sys/time.h>
1141          #include <time.h>
1142          struct timeval start_time, end_time;
1143          struct timezone dummy;
1144          int i;
1145    
1146    for (i = 0; !match && i < pattern_count; i++)        if (jfriedl_XT)
1147      {        {
1148      match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,            unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1149        offsets, 99) >= 0;            const char *orig = ptr;
1150      }            ptr = malloc(newlen + 1);
1151              if (!ptr) {
1152                      printf("out of memory");
1153                      pcregrep_exit(2);
1154              }
1155              endptr = ptr;
1156              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1157              for (i = 0; i < jfriedl_XT; i++) {
1158                      strncpy(endptr, orig,  length);
1159                      endptr += length;
1160              }
1161              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1162              length = newlen;
1163          }
1164    
1165          if (gettimeofday(&start_time, &dummy) != 0)
1166                  perror("bad gettimeofday");
1167    
1168    
1169          for (i = 0; i < jfriedl_XR; i++)
1170              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1171                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1172    
1173          if (gettimeofday(&end_time, &dummy) != 0)
1174                  perror("bad gettimeofday");
1175    
1176    /* If it's a match or a not-match (as required), print what's wanted. */        double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1177                          -
1178                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1179    
1180          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1181          return 0;
1182      }
1183    #endif
1184    
1185      /* We come back here after a match when the -o option (only_matching) is set,
1186      in order to find any further matches in the same line. */
1187    
1188      ONLY_MATCHING_RESTART:
1189    
1190      /* Run through all the patterns until one matches or there is an error other
1191      than NOMATCH. This code is in a subroutine so that it can be re-used for
1192      finding subsequent matches when colouring matched lines. */
1193    
1194      match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1195    
1196      /* If it's a match or a not-match (as required), do what's wanted. */
1197    
1198    if (match != invert)    if (match != invert)
1199      {      {
1200      BOOL hyphenprinted = FALSE;      BOOL hyphenprinted = FALSE;
1201    
1202      if (filenames_nomatch_only) return 1;      /* We've failed if we want a file that doesn't have any matches. */
1203    
1204        if (filenames == FN_NOMATCH_ONLY) return 1;
1205    
1206        /* Just count if just counting is wanted. */
1207    
1208      if (count_only) count++;      if (count_only) count++;
1209    
1210      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
1211        in the file. */
1212    
1213        else if (filenames == FN_MATCH_ONLY)
1214        {        {
1215        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1216        return 0;        return 0;
1217        }        }
1218    
1219        /* Likewise, if all we want is a yes/no answer. */
1220    
1221      else if (quiet) return 0;      else if (quiet) return 0;
1222    
1223        /* The --only-matching option prints just the substring that matched, or a
1224        captured portion of it, as long as this string is not empty, and the
1225        --file-offsets and --line-offsets options output offsets for the matching
1226        substring (they both force --only-matching = 0). None of these options
1227        prints any context. Afterwards, adjust the start and then jump back to look
1228        for further matches in the same line. If we are in invert mode, however,
1229        nothing is printed and we do not restart - this could still be useful
1230        because the return code is set. */
1231    
1232        else if (only_matching >= 0)
1233          {
1234          if (!invert)
1235            {
1236            if (printname != NULL) fprintf(stdout, "%s:", printname);
1237            if (number) fprintf(stdout, "%d:", linenumber);
1238            if (line_offsets)
1239              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1240                offsets[1] - offsets[0]);
1241            else if (file_offsets)
1242              fprintf(stdout, "%d,%d\n",
1243                (int)(filepos + matchptr + offsets[0] - ptr),
1244                offsets[1] - offsets[0]);
1245            else if (only_matching < mrc)
1246              {
1247              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1248              if (plen > 0)
1249                {
1250                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1251                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1252                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1253                fprintf(stdout, "\n");
1254                }
1255              }
1256            else if (printname != NULL || number) fprintf(stdout, "\n");
1257            match = FALSE;
1258            if (line_buffered) fflush(stdout);
1259            rc = 0;                      /* Had some success */
1260            startoffset = offsets[1];    /* Restart after the match */
1261            goto ONLY_MATCHING_RESTART;
1262            }
1263          }
1264    
1265        /* This is the default case when none of the above options is set. We print
1266        the matching lines(s), possibly preceded and/or followed by other lines of
1267        context. */
1268    
1269      else      else
1270        {        {
1271        /* See if there is a requirement to print some "after" lines from a        /* See if there is a requirement to print some "after" lines from a
# Line 467  while (ptr < endptr) Line 1273  while (ptr < endptr)
1273    
1274        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1275          {          {
1276            int ellength;
1277          int linecount = 0;          int linecount = 0;
1278          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1279    
1280          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1281            {            {
1282            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1283            linecount++;            linecount++;
1284            }            }
1285    
1286          /* It is important to advance lastmatchrestart during this printing so          /* It is important to advance lastmatchrestart during this printing so
1287          that it interacts correctly with any "before" printing below. */          that it interacts correctly with any "before" printing below. Print
1288            each line's data using fwrite() in case there are binary zeroes. */
1289    
1290          while (lastmatchrestart < p)          while (lastmatchrestart < p)
1291            {            {
1292            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1293            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1294            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1295            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1296            fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1297            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1298            }            }
1299          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1300          }          }
# Line 509  while (ptr < endptr) Line 1316  while (ptr < endptr)
1316          int linecount = 0;          int linecount = 0;
1317          char *p = ptr;          char *p = ptr;
1318    
1319          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1320                 linecount++ < before_context)                 linecount < before_context)
1321            {            {
1322            p--;            linecount++;
1323            while (p > buffer && p[-1] != '\n') p--;            p = previous_line(p, main_buffer);
1324            }            }
1325    
1326          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 521  while (ptr < endptr) Line 1328  while (ptr < endptr)
1328    
1329          while (p < ptr)          while (p < ptr)
1330            {            {
1331              int ellength;
1332            char *pp = p;            char *pp = p;
1333            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1334            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1335            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1336            fprintf(stdout, "%.*s", pp - p + 1, p);            FWRITE(p, 1, pp - p, stdout);
1337            p = pp + 1;            p = pp;
1338            }            }
1339          }          }
1340    
1341        /* Now print the matching line(s); ensure we set hyphenpending at the end        /* Now print the matching line(s); ensure we set hyphenpending at the end
1342        of the file. */        of the file if any context lines are being output. */
1343    
1344          if (after_context > 0 || before_context > 0)
1345            endhyphenpending = TRUE;
1346    
       endhyphenpending = TRUE;  
1347        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (printname != NULL) fprintf(stdout, "%s:", printname);
1348        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
1349    
1350        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1351        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1352        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1353        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1354          the match will always be before the first newline sequence. */
1355    
1356        if (multiline)        if (multiline & !invert)
1357          {          {
1358          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1359          t = ptr;          t = ptr;
1360          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch)
1361          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1362          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &endlinelength);
1363              if (t < endmatch) linenumber++; else break;
1364              }
1365            linelength = t - ptr - endlinelength;
1366            }
1367    
1368          /*** NOTE: Use only fwrite() to output the data line, so that binary
1369          zeroes are treated as just another data character. */
1370    
1371          /* This extra option, for Jeffrey Friedl's debugging requirements,
1372          replaces the matched string, or a specific captured string if it exists,
1373          with X. When this happens, colouring is ignored. */
1374    
1375    #ifdef JFRIEDL_DEBUG
1376          if (S_arg >= 0 && S_arg < mrc)
1377            {
1378            int first = S_arg * 2;
1379            int last  = first + 1;
1380            FWRITE(ptr, 1, offsets[first], stdout);
1381            fprintf(stdout, "X");
1382            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1383          }          }
1384          else
1385    #endif
1386    
1387          /* We have to split the line(s) up if colouring, and search for further
1388          matches, but not of course if the line is a non-match. */
1389    
1390        fprintf(stdout, "%.*s\n", linelength, ptr);        if (do_colour && !invert)
1391            {
1392            int plength;
1393            FWRITE(ptr, 1, offsets[0], stdout);
1394            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1395            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1396            fprintf(stdout, "%c[00m", 0x1b);
1397            for (;;)
1398              {
1399              startoffset = offsets[1];
1400              if (startoffset >= (int)linelength + endlinelength ||
1401                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1402                break;
1403              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1404              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1405              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1406              fprintf(stdout, "%c[00m", 0x1b);
1407              }
1408    
1409            /* In multiline mode, we may have already printed the complete line
1410            and its line-ending characters (if they matched the pattern), so there
1411            may be no more to print. */
1412    
1413            plength = (int)((linelength + endlinelength) - startoffset);
1414            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1415            }
1416    
1417          /* Not colouring; no need to search for further matches */
1418    
1419          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1420        }        }
1421    
1422        /* End of doing what has to be done for a match. If --line-buffered was
1423        given, flush the output. */
1424    
1425        if (line_buffered) fflush(stdout);
1426      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1427    
1428      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1429      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1430    
1431      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1432      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1433      }      }
1434    
1435    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1436      anything to be printed), we have to move on to the end of the match before
1437      proceeding. */
1438    
1439      if (multiline && invert && match)
1440        {
1441        int ellength;
1442        char *endmatch = ptr + offsets[1];
1443        t = ptr;
1444        while (t < endmatch)
1445          {
1446          t = end_of_line(t, endptr, &ellength);
1447          if (t <= endmatch) linenumber++; else break;
1448          }
1449        endmatch = end_of_line(endmatch, endptr, &ellength);
1450        linelength = endmatch - ptr - ellength;
1451        }
1452    
1453    ptr += linelength + 1;    /* Advance to after the newline and increment the line number. The file
1454      offset to the current line is maintained in filepos. */
1455    
1456      ptr += linelength + endlinelength;
1457      filepos += (int)(linelength + endlinelength);
1458    linenumber++;    linenumber++;
1459    
1460      /* If input is line buffered, and the buffer is not yet full, read another
1461      line and add it into the buffer. */
1462    
1463      if (input_line_buffered && bufflength < (size_t)bufsize)
1464        {
1465        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1466        bufflength += add;
1467        endptr += add;
1468        }
1469    
1470    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1471    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1472    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1473    about to be lost, print them. */    about to be lost, print them. */
1474    
1475    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1476      {      {
1477      if (after_context > 0 &&      if (after_context > 0 &&
1478          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1479          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1480        {        {
1481        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1482        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 585  while (ptr < endptr) Line 1484  while (ptr < endptr)
1484    
1485      /* Now do the shuffle */      /* Now do the shuffle */
1486    
1487      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1488      ptr -= MBUFTHIRD;      ptr -= bufthird;
1489      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1490      endptr = buffer + bufflength;  #ifdef SUPPORT_LIBZ
1491        if (frtype == FR_LIBZ)
1492          bufflength = 2*bufthird +
1493            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1494        else
1495    #endif
1496    
1497    #ifdef SUPPORT_LIBBZ2
1498        if (frtype == FR_LIBBZ2)
1499          bufflength = 2*bufthird +
1500            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1501        else
1502    #endif
1503    
1504        bufflength = 2*bufthird +
1505          (input_line_buffered?
1506           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1507           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1508        endptr = main_buffer + bufflength;
1509    
1510      /* Adjust any last match point */      /* Adjust any last match point */
1511    
1512      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1513      }      }
1514    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1515    
1516  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1517  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1518    
1519  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);  if (only_matching < 0 && !count_only)
1520  hyphenpending |= endhyphenpending;    {
1521      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1522      hyphenpending |= endhyphenpending;
1523      }
1524    
1525  /* Print the file name if we are looking for those without matches and there  /* Print the file name if we are looking for those without matches and there
1526  were none. If we found a match, we won't have got this far. */  were none. If we found a match, we won't have got this far. */
1527    
1528  if (filenames_nomatch_only)  if (filenames == FN_NOMATCH_ONLY)
1529    {    {
1530    fprintf(stdout, "%s\n", printname);    fprintf(stdout, "%s\n", printname);
1531    return 0;    return 0;
# Line 615  if (filenames_nomatch_only) Line 1535  if (filenames_nomatch_only)
1535    
1536  if (count_only)  if (count_only)
1537    {    {
1538    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1539    fprintf(stdout, "%d\n", count);      {
1540        if (printname != NULL && filenames != FN_NONE)
1541          fprintf(stdout, "%s:", printname);
1542        fprintf(stdout, "%d\n", count);
1543        }
1544    }    }
1545    
1546  return rc;  return rc;
# Line 633  recursing; if it's a file, grep it. Line 1557  recursing; if it's a file, grep it.
1557    
1558  Arguments:  Arguments:
1559    pathname          the path to investigate    pathname          the path to investigate
1560    dir_recurse       TRUE if recursing is wanted (-r)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   show_filenames    TRUE if file names are wanted for multiple files, except  
                       for the only file at top level when not filenames_only  
1561    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1562    
1563  Returns:   0 if there was at least one match  Returns:   0 if there was at least one match
# Line 646  However, file opening failures are suppr Line 1568  However, file opening failures are suppr
1568  */  */
1569    
1570  static int  static int
1571  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
1572  {  {
1573  int rc = 1;  int rc = 1;
1574  int sep;  int sep;
1575  FILE *in;  int frtype;
1576  char *printname;  int pathlen;
1577    void *handle;
1578    FILE *in = NULL;           /* Ensure initialized */
1579    
1580    #ifdef SUPPORT_LIBZ
1581    gzFile ingz = NULL;
1582    #endif
1583    
1584    #ifdef SUPPORT_LIBBZ2
1585    BZFILE *inbz2 = NULL;
1586    #endif
1587    
1588  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1589    
1590  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1591    {    {
1592    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN, stdin_name,
1593      (filenames_only || filenames_nomatch_only ||      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
     (show_filenames && !only_one_at_top))?  
1594        stdin_name : NULL);        stdin_name : NULL);
1595    }    }
1596    
1597  /* If the file is a directory and we are recursing, scan each file within it,  /* If the file is a directory, skip if skipping or if we are recursing, scan
1598  subject to any include or exclude patterns that were set. The scanning code is  each file and directory within it, subject to any include or exclude patterns
1599  localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1600    system-specific. */
1601    
1602  if ((sep = isdirectory(pathname)) != 0 && dir_recurse)  if ((sep = isdirectory(pathname)) != 0)
1603    {    {
1604    char buffer[1024];    if (dee_action == dee_SKIP) return 1;
1605    char *nextfile;    if (dee_action == dee_RECURSE)
1606    directory_type *dir = opendirectory(pathname);      {
1607        char buffer[1024];
1608        char *nextfile;
1609        directory_type *dir = opendirectory(pathname);
1610    
1611        if (dir == NULL)
1612          {
1613          if (!silent)
1614            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1615              strerror(errno));
1616          return 2;
1617          }
1618    
1619        while ((nextfile = readdirectory(dir)) != NULL)
1620          {
1621          int frc, nflen;
1622          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1623          nflen = (int)(strlen(nextfile));
1624    
1625          if (isdirectory(buffer))
1626            {
1627            if (exclude_dir_compiled != NULL &&
1628                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1629              continue;
1630    
1631            if (include_dir_compiled != NULL &&
1632                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1633              continue;
1634            }
1635          else
1636            {
1637            if (exclude_compiled != NULL &&
1638                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1639              continue;
1640    
1641            if (include_compiled != NULL &&
1642                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1643              continue;
1644            }
1645    
1646          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1647          if (frc > 1) rc = frc;
1648           else if (frc == 0 && rc == 1) rc = 0;
1649          }
1650    
1651        closedirectory(dir);
1652        return rc;
1653        }
1654      }
1655    
1656    /* If the file is not a directory and not a regular file, skip it if that's
1657    been requested. */
1658    
1659    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1660    
1661    /* Control reaches here if we have a regular file, or if we have a directory
1662    and recursion or skipping was not requested, or if we have anything else and
1663    skipping was not requested. The scan proceeds. If this is the first and only
1664    argument at top level, we don't show the file name, unless we are only showing
1665    the file name, or the filename was forced (-H). */
1666    
1667    if (dir == NULL)  pathlen = (int)(strlen(pathname));
1668    
1669    /* Open using zlib if it is supported and the file name ends with .gz. */
1670    
1671    #ifdef SUPPORT_LIBZ
1672    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1673      {
1674      ingz = gzopen(pathname, "rb");
1675      if (ingz == NULL)
1676      {      {
1677      if (!silent)      if (!silent)
1678        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1679          strerror(errno));          strerror(errno));
1680      return 2;      return 2;
1681      }      }
1682      handle = (void *)ingz;
1683      frtype = FR_LIBZ;
1684      }
1685    else
1686    #endif
1687    
1688    while ((nextfile = readdirectory(dir)) != NULL)  /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
     {  
     int frc, blen;  
     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);  
     blen = strlen(buffer);  
   
     if (exclude_compiled != NULL &&  
         pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
       continue;  
1689    
1690      if (include_compiled != NULL &&  #ifdef SUPPORT_LIBBZ2
1691          pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1692        continue;    {
1693      inbz2 = BZ2_bzopen(pathname, "rb");
1694      handle = (void *)inbz2;
1695      frtype = FR_LIBBZ2;
1696      }
1697    else
1698    #endif
1699    
1700      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);  /* Otherwise use plain fopen(). The label is so that we can come back here if
1701      if (frc > 1) rc = frc;  an attempt to read a .bz2 file indicates that it really is a plain file. */
      else if (frc == 0 && rc == 1) rc = 0;  
     }  
1702    
1703    closedirectory(dir);  #ifdef SUPPORT_LIBBZ2
1704    return rc;  PLAIN_FILE:
1705    #endif
1706      {
1707      in = fopen(pathname, "rb");
1708      handle = (void *)in;
1709      frtype = FR_PLAIN;
1710    }    }
1711    
1712  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* All the opening methods return errno when they fail. */
 the first and only argument at top level, we don't show the file name (unless  
 we are only showing the file name). Otherwise, control is via the  
 show_filenames variable. */  
1713    
1714  in = fopen(pathname, "r");  if (handle == NULL)
 if (in == NULL)  
1715    {    {
1716    if (!silent)    if (!silent)
1717      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 719  if (in == NULL) Line 1719  if (in == NULL)
1719    return 2;    return 2;
1720    }    }
1721    
1722  printname =  (filenames_only || filenames_nomatch_only ||  /* Now grep the file */
1723    (show_filenames && !only_one_at_top))? pathname : NULL;  
1724    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1725      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1726    
1727    /* Close in an appropriate manner. */
1728    
1729    #ifdef SUPPORT_LIBZ
1730    if (frtype == FR_LIBZ)
1731      gzclose(ingz);
1732    else
1733    #endif
1734    
1735  rc = pcregrep(in, printname);  /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1736    read failed. If the error indicates that the file isn't in fact bzipped, try
1737    again as a normal file. */
1738    
1739    #ifdef SUPPORT_LIBBZ2
1740    if (frtype == FR_LIBBZ2)
1741      {
1742      if (rc == 3)
1743        {
1744        int errnum;
1745        const char *err = BZ2_bzerror(inbz2, &errnum);
1746        if (errnum == BZ_DATA_ERROR_MAGIC)
1747          {
1748          BZ2_bzclose(inbz2);
1749          goto PLAIN_FILE;
1750          }
1751        else if (!silent)
1752          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1753            pathname, err);
1754        rc = 2;    /* The normal "something went wrong" code */
1755        }
1756      BZ2_bzclose(inbz2);
1757      }
1758    else
1759    #endif
1760    
1761    /* Normal file close */
1762    
1763  fclose(in);  fclose(in);
1764    
1765    /* Pass back the yield from pcregrep(). */
1766    
1767  return rc;  return rc;
1768  }  }
1769    
# Line 738  return rc; Line 1777  return rc;
1777  static int  static int
1778  usage(int rc)  usage(int rc)
1779  {  {
1780  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");  option_item *op;
1781  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Usage: pcregrep [-");
1782    for (op = optionlist; op->one_char != 0; op++)
1783      {
1784      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1785      }
1786    fprintf(stderr, "] [long options] [pattern] [files]\n");
1787    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1788      "options.\n");
1789  return rc;  return rc;
1790  }  }
1791    
# Line 757  option_item *op; Line 1803  option_item *op;
1803    
1804  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1805  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1806  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1807  printf("\"-\" can be used as a file name to mean STDIN.\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1808    
1809    #ifdef SUPPORT_LIBZ
1810    printf("Files whose names end in .gz are read using zlib.\n");
1811    #endif
1812    
1813    #ifdef SUPPORT_LIBBZ2
1814    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1815    #endif
1816    
1817    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1818    printf("Other files and the standard input are read as plain files.\n\n");
1819    #else
1820    printf("All files are read as plain files, without any interpretation.\n\n");
1821    #endif
1822    
1823    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1824  printf("Options:\n");  printf("Options:\n");
1825    
1826  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
1827    {    {
1828    int n;    int n;
1829    char s[4];    char s[4];
1830    
1831      /* Two options were accidentally implemented and documented with underscores
1832      instead of hyphens in their names, something that was not noticed for quite a
1833      few releases. When fixing this, I left the underscored versions in the list
1834      in case people were using them. However, we don't want to display them in the
1835      help data. There are no other options that contain underscores, and we do not
1836      expect ever to implement such options. Therefore, just omit any option that
1837      contains an underscore. */
1838    
1839      if (strchr(op->long_name, '_') != NULL) continue;
1840    
1841    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1842    printf("  %s --%s%n", s, op->long_name, &n);    n = 31 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1843    if (n < 1) n = 1;    if (n < 1) n = 1;
1844    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1845    }    }
1846    
1847  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1848    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1849    printf("When reading patterns from a file instead of using a command line option,\n");
1850  printf("trailing white space is removed and blank lines are ignored.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1851  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1852      MAX_PATTERN_COUNT, PATBUFSIZE);
1853    
1854  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1855  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 794  handle_option(int letter, int options) Line 1867  handle_option(int letter, int options)
1867  {  {
1868  switch(letter)  switch(letter)
1869    {    {
1870    case -1:  help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1871      case N_HELP: help(); pcregrep_exit(0);
1872      case N_LBUFFER: line_buffered = TRUE; break;
1873      case N_LOFFSETS: line_offsets = number = TRUE; break;
1874      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1875    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1876    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1877      case 'H': filenames = FN_FORCE; break;
1878      case 'h': filenames = FN_NONE; break;
1879    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1880    case 'l': filenames_only = TRUE; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1881    case 'L': filenames_nomatch_only = TRUE; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1882    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1883    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1884      case 'o': only_matching = 0; break;
1885    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1886    case 'r': recurse = TRUE; break;    case 'r': dee_action = dee_RECURSE; break;
1887    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1888    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1889    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1890    case 'w': word_match = TRUE; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1891    case 'x': whole_lines = TRUE; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1892    
1893    case 'V':    case 'V':
1894    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1895    fprintf(stderr, "PCRE version %s\n", pcre_version());    pcregrep_exit(0);
   exit(0);  
1896    break;    break;
1897    
1898    default:    default:
1899    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1900    exit(usage(2));    pcregrep_exit(usage(2));
1901    }    }
1902    
1903  return options;  return options;
# Line 828  return options; Line 1907  return options;
1907    
1908    
1909  /*************************************************  /*************************************************
1910    *          Construct printed ordinal             *
1911    *************************************************/
1912    
1913    /* This turns a number into "1st", "3rd", etc. */
1914    
1915    static char *
1916    ordin(int n)
1917    {
1918    static char buffer[8];
1919    char *p = buffer;
1920    sprintf(p, "%d", n);
1921    while (*p != 0) p++;
1922    switch (n%10)
1923      {
1924      case 1: strcpy(p, "st"); break;
1925      case 2: strcpy(p, "nd"); break;
1926      case 3: strcpy(p, "rd"); break;
1927      default: strcpy(p, "th"); break;
1928      }
1929    return buffer;
1930    }
1931    
1932    
1933    
1934    /*************************************************
1935    *          Compile a single pattern              *
1936    *************************************************/
1937    
1938    /* When the -F option has been used, this is called for each substring.
1939    Otherwise it's called for each supplied pattern.
1940    
1941    Arguments:
1942      pattern        the pattern string
1943      options        the PCRE options
1944      filename       the file name, or NULL for a command-line pattern
1945      count          0 if this is the only command line pattern, or
1946                     number of the command line pattern, or
1947                     linenumber for a pattern from a file
1948    
1949    Returns:         TRUE on success, FALSE after an error
1950    */
1951    
1952    static BOOL
1953    compile_single_pattern(char *pattern, int options, char *filename, int count)
1954    {
1955    char buffer[PATBUFSIZE];
1956    const char *error;
1957    int errptr;
1958    
1959    if (pattern_count >= MAX_PATTERN_COUNT)
1960      {
1961      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1962        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1963      return FALSE;
1964      }
1965    
1966    sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1967      suffix[process_options]);
1968    pattern_list[pattern_count] =
1969      pcre_compile(buffer, options, &error, &errptr, pcretables);
1970    if (pattern_list[pattern_count] != NULL)
1971      {
1972      pattern_count++;
1973      return TRUE;
1974      }
1975    
1976    /* Handle compile errors */
1977    
1978    errptr -= (int)strlen(prefix[process_options]);
1979    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1980    
1981    if (filename == NULL)
1982      {
1983      if (count == 0)
1984        fprintf(stderr, "pcregrep: Error in command-line regex "
1985          "at offset %d: %s\n", errptr, error);
1986      else
1987        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1988          "at offset %d: %s\n", ordin(count), errptr, error);
1989      }
1990    else
1991      {
1992      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1993        "at offset %d: %s\n", count, filename, errptr, error);
1994      }
1995    
1996    return FALSE;
1997    }
1998    
1999    
2000    
2001    /*************************************************
2002    *           Compile one supplied pattern         *
2003    *************************************************/
2004    
2005    /* When the -F option has been used, each string may be a list of strings,
2006    separated by line breaks. They will be matched literally.
2007    
2008    Arguments:
2009      pattern        the pattern string
2010      options        the PCRE options
2011      filename       the file name, or NULL for a command-line pattern
2012      count          0 if this is the only command line pattern, or
2013                     number of the command line pattern, or
2014                     linenumber for a pattern from a file
2015    
2016    Returns:         TRUE on success, FALSE after an error
2017    */
2018    
2019    static BOOL
2020    compile_pattern(char *pattern, int options, char *filename, int count)
2021    {
2022    if ((process_options & PO_FIXED_STRINGS) != 0)
2023      {
2024      char *eop = pattern + strlen(pattern);
2025      char buffer[PATBUFSIZE];
2026      for(;;)
2027        {
2028        int ellength;
2029        char *p = end_of_line(pattern, eop, &ellength);
2030        if (ellength == 0)
2031          return compile_single_pattern(pattern, options, filename, count);
2032        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2033        pattern = p;
2034        if (!compile_single_pattern(buffer, options, filename, count))
2035          return FALSE;
2036        }
2037      }
2038    else return compile_single_pattern(pattern, options, filename, count);
2039    }
2040    
2041    
2042    
2043    /*************************************************
2044  *                Main program                    *  *                Main program                    *
2045  *************************************************/  *************************************************/
2046    
# Line 838  main(int argc, char **argv) Line 2051  main(int argc, char **argv)
2051  {  {
2052  int i, j;  int i, j;
2053  int rc = 1;  int rc = 1;
2054  int options = 0;  int pcre_options = 0;
2055    int cmd_pattern_count = 0;
2056    int hint_count = 0;
2057  int errptr;  int errptr;
 const char *error;  
2058  BOOL only_one_at_top;  BOOL only_one_at_top;
2059    char *patterns[MAX_PATTERN_COUNT];
2060    const char *locale_from = "--locale";
2061    const char *error;
2062    
2063    #ifdef SUPPORT_PCREGREP_JIT
2064    pcre_jit_stack *jit_stack = NULL;
2065    #endif
2066    
2067    /* Set the default line ending value from the default in the PCRE library;
2068    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2069    Note that the return values from pcre_config(), though derived from the ASCII
2070    codes, are the same in EBCDIC environments, so we must use the actual values
2071    rather than escapes such as as '\r'. */
2072    
2073    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2074    switch(i)
2075      {
2076      default:               newline = (char *)"lf"; break;
2077      case 13:               newline = (char *)"cr"; break;
2078      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2079      case -1:               newline = (char *)"any"; break;
2080      case -2:               newline = (char *)"anycrlf"; break;
2081      }
2082    
2083  /* Process the options */  /* Process the options */
2084    
# Line 855  for (i = 1; i < argc; i++) Line 2092  for (i = 1; i < argc; i++)
2092    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2093    
2094    /* If we hit an argument that is just "-", it may be a reference to STDIN,    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2095    but only if we have previously had -f to define the patterns. */    but only if we have previously had -e or -f to define the patterns. */
2096    
2097    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2098      {      {
2099      if (pattern_filename != NULL) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2100        else exit(usage(2));        else pcregrep_exit(usage(2));
2101      }      }
2102    
2103    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 881  for (i = 1; i < argc; i++) Line 2118  for (i = 1; i < argc; i++)
2118      /* Some long options have data that follows after =, for example file=name.      /* Some long options have data that follows after =, for example file=name.
2119      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2120      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2121      with Jeff Friedl in preferring "regex" without the "p". These options are      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2122      entered in the table as "regex(p)". No option is in both these categories,      These options are entered in the table as "regex(p)". Options can be in
2123      fortunately. */      both these categories. */
2124    
2125      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2126        {        {
2127        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2128        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2129        if (opbra == NULL)     /* Not a (p) case */  
2130          /* Handle options with only one spelling of the name */
2131    
2132          if (opbra == NULL)     /* Does not contain '(' */
2133          {          {
2134          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2135            {            {
# Line 897  for (i = 1; i < argc; i++) Line 2137  for (i = 1; i < argc; i++)
2137            }            }
2138          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2139            {            {
2140            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2141            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2142                (int)strlen(arg) : (int)(argequals - arg);
2143            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2144              {              {
2145              option_data = arg + arglen;              option_data = arg + arglen;
# Line 911  for (i = 1; i < argc; i++) Line 2152  for (i = 1; i < argc; i++)
2152              }              }
2153            }            }
2154          }          }
2155        else                   /* Special case xxxx(p) */  
2156          /* Handle options with an alternate spelling of the name */
2157    
2158          else
2159          {          {
2160          char buff1[24];          char buff1[24];
2161          char buff2[24];          char buff2[24];
2162          int baselen = opbra - op->long_name;  
2163            int baselen = (int)(opbra - op->long_name);
2164            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2165            int arglen = (argequals == NULL || equals == NULL)?
2166              (int)strlen(arg) : (int)(argequals - arg);
2167    
2168          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2169          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2170            opbra + 1);  
2171          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2172               strncmp(arg, buff2, arglen) == 0)
2173              {
2174              if (equals != NULL && argequals != NULL)
2175                {
2176                option_data = argequals;
2177                if (*option_data == '=')
2178                  {
2179                  option_data++;
2180                  longopwasequals = TRUE;
2181                  }
2182                }
2183            break;            break;
2184              }
2185          }          }
2186        }        }
2187    
2188      if (op->one_char == 0)      if (op->one_char == 0)
2189        {        {
2190        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2191        exit(usage(2));        pcregrep_exit(usage(2));
2192        }        }
2193      }      }
2194    
2195      /* Jeffrey Friedl's debugging harness uses these additional options which
2196      are not in the right form for putting in the option table because they use
2197      only one hyphen, yet are more than one character long. By putting them
2198      separately here, they will not get displayed as part of the help() output,
2199      but I don't think Jeffrey will care about that. */
2200    
2201    #ifdef JFRIEDL_DEBUG
2202      else if (strcmp(argv[i], "-pre") == 0) {
2203              jfriedl_prefix = argv[++i];
2204              continue;
2205      } else if (strcmp(argv[i], "-post") == 0) {
2206              jfriedl_postfix = argv[++i];
2207              continue;
2208      } else if (strcmp(argv[i], "-XT") == 0) {
2209              sscanf(argv[++i], "%d", &jfriedl_XT);
2210              continue;
2211      } else if (strcmp(argv[i], "-XR") == 0) {
2212              sscanf(argv[++i], "%d", &jfriedl_XR);
2213              continue;
2214      }
2215    #endif
2216    
2217    
2218    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
2219    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
2220    
# Line 941  for (i = 1; i < argc; i++) Line 2225  for (i = 1; i < argc; i++)
2225      while (*s != 0)      while (*s != 0)
2226        {        {
2227        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2228          { if (*s == op->one_char) break; }          {
2229            if (*s == op->one_char) break;
2230            }
2231        if (op->one_char == 0)        if (op->one_char == 0)
2232          {          {
2233          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2234            *s, argv[i]);            *s, argv[i]);
2235          exit(usage(2));          pcregrep_exit(usage(2));
2236          }          }
2237        if (op->type != OP_NODATA || s[1] == 0)  
2238          /* Check for a single-character option that has data: OP_OP_NUMBER
2239          is used for one that either has a numerical number or defaults, i.e. the
2240          data is optional. If a digit follows, there is data; if not, carry on
2241          with other single-character options in the same string. */
2242    
2243          option_data = s+1;
2244          if (op->type == OP_OP_NUMBER)
2245          {          {
2246          option_data = s+1;          if (isdigit((unsigned char)s[1])) break;
         break;  
2247          }          }
2248        options = handle_option(*s++, options);        else   /* Check for end or a dataless option */
2249            {
2250            if (op->type != OP_NODATA || s[1] == 0) break;
2251            }
2252    
2253          /* Handle a single-character option with no data, then loop for the
2254          next character in the string. */
2255    
2256          pcre_options = handle_option(*s++, pcre_options);
2257        }        }
2258      }      }
2259    
2260    /* At this point we should have op pointing to a matched option */    /* At this point we should have op pointing to a matched option. If the type
2261      is NO_DATA, it means that there is no data, and the option might set
2262      something in the PCRE options. */
2263    
2264    if (op->type == OP_NODATA)    if (op->type == OP_NODATA)
     options = handle_option(op->one_char, options);  
   else  
2265      {      {
2266      if (*option_data == 0)      pcre_options = handle_option(op->one_char, pcre_options);
2267        continue;
2268        }
2269    
2270      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2271      either has a value or defaults to something. It cannot have data in a
2272      separate item. At the moment, the only such options are "colo(u)r",
2273      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2274    
2275      if (*option_data == 0 &&
2276          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2277        {
2278        switch (op->one_char)
2279        {        {
2280        if (i >= argc - 1 || longopwasequals)        case N_COLOUR:
2281          {        colour_option = (char *)"auto";
2282          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        break;
2283          exit(usage(2));  
2284          }        case 'o':
2285        option_data = argv[++i];        only_matching = 0;
2286          break;
2287    
2288    #ifdef JFRIEDL_DEBUG
2289          case 'S':
2290          S_arg = 0;
2291          break;
2292    #endif
2293        }        }
2294        continue;
2295        }
2296    
2297      /* Otherwise, find the data string for the option. */
2298    
2299      if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else    if (*option_data == 0)
2300        {
2301        if (i >= argc - 1 || longopwasequals)
2302        {        {
2303        char *endptr;        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2304        int n = strtoul(option_data, &endptr, 10);        pcregrep_exit(usage(2));
2305        if (*endptr != 0)        }
2306        option_data = argv[++i];
2307        }
2308    
2309      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2310      multiple times to create a list of patterns. */
2311    
2312      if (op->type == OP_PATLIST)
2313        {
2314        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2315          {
2316          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2317            MAX_PATTERN_COUNT);
2318          return 2;
2319          }
2320        patterns[cmd_pattern_count++] = option_data;
2321        }
2322    
2323      /* Otherwise, deal with single string or numeric data values. */
2324    
2325      else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2326               op->type != OP_OP_NUMBER)
2327        {
2328        *((char **)op->dataptr) = option_data;
2329        }
2330    
2331      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2332      only for unpicking arguments, so just keep it simple. */
2333    
2334      else
2335        {
2336        unsigned long int n = 0;
2337        char *endptr = option_data;
2338        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2339        while (isdigit((unsigned char)(*endptr)))
2340          n = n * 10 + (int)(*endptr++ - '0');
2341        if (toupper(*endptr) == 'K')
2342          {
2343          n *= 1024;
2344          endptr++;
2345          }
2346        else if (toupper(*endptr) == 'M')
2347          {
2348          n *= 1024*1024;
2349          endptr++;
2350          }
2351        if (*endptr != 0)
2352          {
2353          if (longop)
2354          {          {
2355          if (longop)          char *equals = strchr(op->long_name, '=');
2356            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2357              option_data, op->long_name);            (int)(equals - op->long_name);
2358          else          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2359            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",            option_data, nlen, op->long_name);
             option_data, op->one_char);  
         exit(usage(2));  
2360          }          }
2361        *((int *)op->dataptr) = n;        else
2362            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2363              option_data, op->one_char);
2364          pcregrep_exit(usage(2));
2365        }        }
2366        if (op->type == OP_LONGNUMBER)
2367            *((unsigned long int *)op->dataptr) = n;
2368        else
2369            *((int *)op->dataptr) = n;
2370      }      }
2371    }    }
2372    
# Line 1001  if (both_context > 0) Line 2379  if (both_context > 0)
2379    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2380    }    }
2381    
2382  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2383  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  However, the latter two set only_matching. */
2384    
2385  if (pattern_list == NULL || hints_list == NULL)  if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2386        (file_offsets && line_offsets))
2387    {    {
2388    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2389    return 2;      "and/or --line-offsets\n");
2390      pcregrep_exit(usage(2));
2391    }    }
2392    
2393  /* Compile the regular expression(s). */  if (file_offsets || line_offsets) only_matching = 0;
2394    
2395  if (pattern_filename != NULL)  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2396    LC_ALL environment variable is set, and if so, use it. */
2397    
2398    if (locale == NULL)
2399    {    {
2400    FILE *f = fopen(pattern_filename, "r");    locale = getenv("LC_ALL");
2401    char buffer[MBUFTHIRD + 16];    locale_from = "LCC_ALL";
2402    char *rdstart;    }
2403    int adjust = 0;  
2404    if (locale == NULL)
2405      {
2406      locale = getenv("LC_CTYPE");
2407      locale_from = "LC_CTYPE";
2408      }
2409    
2410    /* If a locale has been provided, set it, and generate the tables the PCRE
2411    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2412    
2413    if (f == NULL)  if (locale != NULL)
2414      {
2415      if (setlocale(LC_CTYPE, locale) == NULL)
2416      {      {
2417      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2418        strerror(errno));        locale, locale_from);
2419      return 2;      return 2;
2420      }      }
2421      pcretables = pcre_maketables();
2422      }
2423    
2424    if (whole_lines)  /* Sort out colouring */
2425    
2426    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2427      {
2428      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2429      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2430      else
2431      {      {
2432      strcpy(buffer, "^(?:");      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2433      adjust = 4;        colour_option);
2434        return 2;
2435        }
2436      if (do_colour)
2437        {
2438        char *cs = getenv("PCREGREP_COLOUR");
2439        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2440        if (cs != NULL) colour_string = cs;
2441      }      }
2442    else if (word_match)    }
2443    
2444    /* Interpret the newline type; the default settings are Unix-like. */
2445    
2446    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2447      {
2448      pcre_options |= PCRE_NEWLINE_CR;
2449      endlinetype = EL_CR;
2450      }
2451    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2452      {
2453      pcre_options |= PCRE_NEWLINE_LF;
2454      endlinetype = EL_LF;
2455      }
2456    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2457      {
2458      pcre_options |= PCRE_NEWLINE_CRLF;
2459      endlinetype = EL_CRLF;
2460      }
2461    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2462      {
2463      pcre_options |= PCRE_NEWLINE_ANY;
2464      endlinetype = EL_ANY;
2465      }
2466    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2467      {
2468      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2469      endlinetype = EL_ANYCRLF;
2470      }
2471    else
2472      {
2473      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2474      return 2;
2475      }
2476    
2477    /* Interpret the text values for -d and -D */
2478    
2479    if (dee_option != NULL)
2480      {
2481      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2482      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2483      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2484      else
2485      {      {
2486      strcpy(buffer, "\\b");      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2487      adjust = 2;      return 2;
2488      }      }
2489      }
2490    
2491    rdstart = buffer + adjust;  if (DEE_option != NULL)
2492    while (fgets(rdstart, MBUFTHIRD, f) != NULL)    {
2493      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2494      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2495      else
2496      {      {
2497      char *s = rdstart + (int)strlen(rdstart);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2498      if (pattern_count >= MAX_PATTERN_COUNT)      return 2;
       {  
       fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",  
         MAX_PATTERN_COUNT);  
       return 2;  
       }  
     while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;  
     if (s == rdstart) continue;  
     if (whole_lines) strcpy(s, ")$");  
       else if (word_match)strcpy(s, "\\b");  
         else *s = 0;  
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr - adjust, error);  
       return 2;  
       }  
2499      }      }
   fclose(f);  
2500    }    }
2501    
2502  /* If no file name, a single regex must be given inline. */  /* Check the values for Jeffrey Friedl's debugging options. */
2503    
2504  else  #ifdef JFRIEDL_DEBUG
2505    if (S_arg > 9)
2506    {    {
2507    char buffer[MBUFTHIRD + 16];    fprintf(stderr, "pcregrep: bad value for -S option\n");
2508    char *pat;    return 2;
2509    int adjust = 0;    }
2510    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2511      {
2512      if (jfriedl_XT == 0) jfriedl_XT = 1;
2513      if (jfriedl_XR == 0) jfriedl_XR = 1;
2514      }
2515    #endif
2516    
2517    /* Get memory for the main buffer, and to store the pattern and hints lists. */
2518    
2519    bufsize = 3*bufthird;
2520    main_buffer = (char *)malloc(bufsize);
2521    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2522    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2523    
2524    if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2525      {
2526      fprintf(stderr, "pcregrep: malloc failed\n");
2527      goto EXIT2;
2528      }
2529    
2530    /* If no patterns were provided by -e, and there is no file provided by -f,
2531    the first argument is the one and only pattern, and it must exist. */
2532    
2533    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2534      {
2535    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2536      patterns[cmd_pattern_count++] = argv[i++];
2537      }
2538    
2539    /* Compile the patterns that were provided on the command line, either by
2540    multiple uses of -e or as a single unkeyed pattern. */
2541    
2542    for (j = 0; j < cmd_pattern_count; j++)
2543      {
2544      if (!compile_pattern(patterns[j], pcre_options, NULL,
2545           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2546        goto EXIT2;
2547      }
2548    
2549    if (whole_lines)  /* Compile the regular expressions that are provided in a file. */
2550    
2551    if (pattern_filename != NULL)
2552      {
2553      int linenumber = 0;
2554      FILE *f;
2555      char *filename;
2556      char buffer[PATBUFSIZE];
2557    
2558      if (strcmp(pattern_filename, "-") == 0)
2559      {      {
2560      sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);      f = stdin;
2561      pat = buffer;      filename = stdin_name;
     adjust = 4;  
2562      }      }
2563    else if (word_match)    else
2564      {      {
2565      sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);      f = fopen(pattern_filename, "r");
2566      pat = buffer;      if (f == NULL)
2567      adjust = 2;        {
2568          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2569            strerror(errno));
2570          goto EXIT2;
2571          }
2572        filename = pattern_filename;
2573      }      }
   else pat = argv[i++];  
2574    
2575    pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);    while (fgets(buffer, PATBUFSIZE, f) != NULL)
   
   if (pattern_list[0] == NULL)  
2576      {      {
2577      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",      char *s = buffer + (int)strlen(buffer);
2578        errptr - adjust, error);      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2579      return 2;      *s = 0;
2580        linenumber++;
2581        if (buffer[0] == 0) continue;   /* Skip blank lines */
2582        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2583          goto EXIT2;
2584      }      }
2585    pattern_count++;  
2586      if (f != stdin) fclose(f);
2587    }    }
2588    
2589  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times. Unless
2590    JIT has been explicitly disabled, arrange a stack for it to use. */
2591    
2592    #ifdef SUPPORT_PCREGREP_JIT
2593    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2594      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2595    #endif
2596    
2597  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2598    {    {
2599    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2600    if (error != NULL)    if (error != NULL)
2601      {      {
2602      char s[16];      char s[16];
2603      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2604      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2605      return 2;      goto EXIT2;
2606        }
2607      hint_count++;
2608    #ifdef SUPPORT_PCREGREP_JIT
2609      if (jit_stack != NULL && hints_list[j] != NULL)
2610        pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2611    #endif
2612      }
2613    
2614    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2615    pcre_extra block for each pattern. */
2616    
2617    if (match_limit > 0 || match_limit_recursion > 0)
2618      {
2619      for (j = 0; j < pattern_count; j++)
2620        {
2621        if (hints_list[j] == NULL)
2622          {
2623          hints_list[j] = malloc(sizeof(pcre_extra));
2624          if (hints_list[j] == NULL)
2625            {
2626            fprintf(stderr, "pcregrep: malloc failed\n");
2627            pcregrep_exit(2);
2628            }
2629          }
2630        if (match_limit > 0)
2631          {
2632          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2633          hints_list[j]->match_limit = match_limit;
2634          }
2635        if (match_limit_recursion > 0)
2636          {
2637          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2638          hints_list[j]->match_limit_recursion = match_limit_recursion;
2639          }
2640      }      }
2641    }    }
2642    
# Line 1117  for (j = 0; j < pattern_count; j++) Line 2644  for (j = 0; j < pattern_count; j++)
2644    
2645  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
2646    {    {
2647    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2648        pcretables);
2649    if (exclude_compiled == NULL)    if (exclude_compiled == NULL)
2650      {      {
2651      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2652        errptr, error);        errptr, error);
2653      return 2;      goto EXIT2;
2654      }      }
2655    }    }
2656    
2657  if (include_pattern != NULL)  if (include_pattern != NULL)
2658    {    {
2659    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2660        pcretables);
2661    if (include_compiled == NULL)    if (include_compiled == NULL)
2662      {      {
2663      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2664        errptr, error);        errptr, error);
2665      return 2;      goto EXIT2;
2666      }      }
2667    }    }
2668    
2669  /* If there are no further arguments, do the business on stdin and exit */  if (exclude_dir_pattern != NULL)
2670      {
2671      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2672        pcretables);
2673      if (exclude_dir_compiled == NULL)
2674        {
2675        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2676          errptr, error);
2677        goto EXIT2;
2678        }
2679      }
2680    
2681  if (i >= argc) return pcregrep(stdin,  if (include_dir_pattern != NULL)
2682    (filenames_only || filenames_nomatch_only)? stdin_name : NULL);    {
2683      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2684        pcretables);
2685      if (include_dir_compiled == NULL)
2686        {
2687        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2688          errptr, error);
2689        goto EXIT2;
2690        }
2691      }
2692    
2693    /* If there are no further arguments, do the business on stdin and exit. */
2694    
2695    if (i >= argc)
2696      {
2697      rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2698        (filenames > FN_DEFAULT)? stdin_name : NULL);
2699      goto EXIT;
2700      }
2701    
2702  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2703  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2704  the file name if the argument is not a directory and filenames_only is not set.  the file name if the argument is not a directory and filenames are not
2705  */  otherwise forced. */
2706    
2707  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2708    
2709  for (; i < argc; i++)  for (; i < argc; i++)
2710    {    {
2711    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2712        only_one_at_top);
2713    if (frc > 1) rc = frc;    if (frc > 1) rc = frc;
2714      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2715    }    }
2716    
2717  return rc;  EXIT:
2718    #ifdef SUPPORT_PCREGREP_JIT
2719    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2720    #endif
2721    if (main_buffer != NULL) free(main_buffer);
2722    if (pattern_list != NULL)
2723      {
2724      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2725      free(pattern_list);
2726      }
2727    if (hints_list != NULL)
2728      {
2729      for (i = 0; i < hint_count; i++)
2730        {
2731        if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2732        }
2733      free(hints_list);
2734      }
2735    pcregrep_exit(rc);
2736    
2737    EXIT2:
2738    rc = 2;
2739    goto EXIT;
2740  }  }
2741    
2742  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.77  
changed lines
  Added in v.779

  ViewVC Help
Powered by ViewVC 1.1.5