/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 1355 by ph10, Tue Aug 13 17:34:02 2013 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7  directories.  recurse into directories, and in z/OS it can handle PDS files.
8    
9             Copyright (c) 1997-2006 University of Cambridge  Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10    additional header is required. That header is not included in the main PCRE
11    distribution because other apparatus is needed to compile pcregrep for z/OS.
12    The header can be found in the special z/OS distribution, which is available
13    from www.zaconsultants.net or from www.cbttape.org.
14    
15               Copyright (c) 1997-2013 University of Cambridge
16    
17  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
18  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 43  POSSIBILITY OF SUCH DAMAGE.
43  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
44  */  */
45    
46    #ifdef HAVE_CONFIG_H
47    #include "config.h"
48    #endif
49    
50  #include <ctype.h>  #include <ctype.h>
51  #include <locale.h>  #include <locale.h>
52  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 56  POSSIBILITY OF SUCH DAMAGE.
56    
57  #include <sys/types.h>  #include <sys/types.h>
58  #include <sys/stat.h>  #include <sys/stat.h>
59    
60    #ifdef HAVE_UNISTD_H
61  #include <unistd.h>  #include <unistd.h>
62    #endif
63    
64    #ifdef SUPPORT_LIBZ
65    #include <zlib.h>
66    #endif
67    
68    #ifdef SUPPORT_LIBBZ2
69    #include <bzlib.h>
70    #endif
71    
 #include "config.h"  
72  #include "pcre.h"  #include "pcre.h"
73    
74  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 76  POSSIBILITY OF SUCH DAMAGE.
76    
77  typedef int BOOL;  typedef int BOOL;
78    
79  #define VERSION "4.2 09-Jan-2006"  #define OFFSET_SIZE 99
 #define MAX_PATTERN_COUNT 100  
80    
81  #if BUFSIZ > 8192  #if BUFSIZ > 8192
82  #define MBUFTHIRD BUFSIZ  #define MAXPATLEN BUFSIZ
83  #else  #else
84  #define MBUFTHIRD 8192  #define MAXPATLEN 8192
85  #endif  #endif
86    
87    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
88    
89  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
90  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
91  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
92    
93  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94    
95    /* File reading styles */
96    
97    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
98    
99  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
100    
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 107  enum { DEE_READ, DEE_SKIP };
107  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
108  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
109    
110    /* Line ending types */
111    
112    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113    
114    /* Binary file options */
115    
116    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117    
118    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119    environments), a warning is issued if the value of fwrite() is ignored.
120    Unfortunately, casting to (void) does not suppress the warning. To get round
121    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122    apply to fprintf(). */
123    
124    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125    
126    
127    
128  /*************************************************  /*************************************************
# Line 94  regular code. */ Line 134  regular code. */
134    
135  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
136  static int S_arg = -1;  static int S_arg = -1;
137    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
138    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
139    static const char *jfriedl_prefix = "";
140    static const char *jfriedl_postfix = "";
141  #endif  #endif
142    
143    static int  endlinetype;
144    
145  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
146  static char *colour_option = NULL;  static char *colour_option = NULL;
147  static char *dee_option = NULL;  static char *dee_option = NULL;
148  static char *DEE_option = NULL;  static char *DEE_option = NULL;
 static char *pattern_filename = NULL;  
 static char *stdin_name = (char *)"(standard input)";  
149  static char *locale = NULL;  static char *locale = NULL;
150    static char *main_buffer = NULL;
151    static char *newline = NULL;
152    static char *om_separator = (char *)"";
153    static char *stdin_name = (char *)"(standard input)";
154    
155  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
156    
 static int  pattern_count = 0;  
 static pcre **pattern_list;  
 static pcre_extra **hints_list;  
   
 static char *include_pattern = NULL;  
 static char *exclude_pattern = NULL;  
   
 static pcre *include_compiled = NULL;  
 static pcre *exclude_compiled = NULL;  
   
157  static int after_context = 0;  static int after_context = 0;
158  static int before_context = 0;  static int before_context = 0;
159    static int binary_files = BIN_BINARY;
160  static int both_context = 0;  static int both_context = 0;
161    static int bufthird = PCREGREP_BUFSIZE;
162    static int bufsize = 3*PCREGREP_BUFSIZE;
163    
164    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165    static int dee_action = dee_SKIP;
166    #else
167  static int dee_action = dee_READ;  static int dee_action = dee_READ;
168    #endif
169    
170  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
171  static int error_count = 0;  static int error_count = 0;
172  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
173    static int pcre_options = 0;
174  static int process_options = 0;  static int process_options = 0;
175    
176    #ifdef SUPPORT_PCREGREP_JIT
177    static int study_options = PCRE_STUDY_JIT_COMPILE;
178    #else
179    static int study_options = 0;
180    #endif
181    
182    static unsigned long int match_limit = 0;
183    static unsigned long int match_limit_recursion = 0;
184    
185  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
186  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
187    static BOOL file_offsets = FALSE;
188  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
189  static BOOL invert = FALSE;  static BOOL invert = FALSE;
190    static BOOL line_buffered = FALSE;
191    static BOOL line_offsets = FALSE;
192  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
193  static BOOL number = FALSE;  static BOOL number = FALSE;
194  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
195    static BOOL resource_error = FALSE;
196  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
197    static BOOL show_only_matching = FALSE;
198  static BOOL silent = FALSE;  static BOOL silent = FALSE;
199    static BOOL utf8 = FALSE;
200    
201    /* Structure for list of --only-matching capturing numbers. */
202    
203    typedef struct omstr {
204      struct omstr *next;
205      int groupnum;
206    } omstr;
207    
208    static omstr *only_matching = NULL;
209    static omstr *only_matching_last = NULL;
210    
211    /* Structure for holding the two variables that describe a number chain. */
212    
213    typedef struct omdatastr {
214      omstr **anchor;
215      omstr **lastptr;
216    } omdatastr;
217    
218    static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219    
220    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
221    
222    typedef struct fnstr {
223      struct fnstr *next;
224      char *name;
225    } fnstr;
226    
227    static fnstr *exclude_from = NULL;
228    static fnstr *exclude_from_last = NULL;
229    static fnstr *include_from = NULL;
230    static fnstr *include_from_last = NULL;
231    
232    static fnstr *file_lists = NULL;
233    static fnstr *file_lists_last = NULL;
234    static fnstr *pattern_files = NULL;
235    static fnstr *pattern_files_last = NULL;
236    
237    /* Structure for holding the two variables that describe a file name chain. */
238    
239    typedef struct fndatastr {
240      fnstr **anchor;
241      fnstr **lastptr;
242    } fndatastr;
243    
244    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245    static fndatastr include_from_data = { &include_from, &include_from_last };
246    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248    
249    /* Structure for pattern and its compiled form; used for matching patterns and
250    also for include/exclude patterns. */
251    
252    typedef struct patstr {
253      struct patstr *next;
254      char *string;
255      pcre *compiled;
256      pcre_extra *hint;
257    } patstr;
258    
259    static patstr *patterns = NULL;
260    static patstr *patterns_last = NULL;
261    static patstr *include_patterns = NULL;
262    static patstr *include_patterns_last = NULL;
263    static patstr *exclude_patterns = NULL;
264    static patstr *exclude_patterns_last = NULL;
265    static patstr *include_dir_patterns = NULL;
266    static patstr *include_dir_patterns_last = NULL;
267    static patstr *exclude_dir_patterns = NULL;
268    static patstr *exclude_dir_patterns_last = NULL;
269    
270    /* Structure holding the two variables that describe a pattern chain. A pointer
271    to such structures is used for each appropriate option. */
272    
273    typedef struct patdatastr {
274      patstr **anchor;
275      patstr **lastptr;
276    } patdatastr;
277    
278    static patdatastr match_patdata = { &patterns, &patterns_last };
279    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283    
284    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285                                     &include_dir_patterns, &exclude_dir_patterns };
286    
287    static const char *incexname[4] = { "--include", "--exclude",
288                                        "--include-dir", "--exclude-dir" };
289    
290  /* Structure for options and list of them */  /* Structure for options and list of them */
291    
292  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293         OP_PATLIST };         OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294    
295  typedef struct option_item {  typedef struct option_item {
296    int type;    int type;
# Line 151  typedef struct option_item { Line 303  typedef struct option_item {
303  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
304  used to identify them. */  used to identify them. */
305    
306  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
307  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
308  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
309  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
310  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
311  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
312  #define N_NULL      (-7)  #define N_LABEL        (-7)
313    #define N_LOCALE       (-8)
314    #define N_NULL         (-9)
315    #define N_LOFFSETS     (-10)
316    #define N_FOFFSETS     (-11)
317    #define N_LBUFFER      (-12)
318    #define N_M_LIMIT      (-13)
319    #define N_M_LIMIT_REC  (-14)
320    #define N_BUFSIZE      (-15)
321    #define N_NOJIT        (-16)
322    #define N_FILE_LIST    (-17)
323    #define N_BINARY_FILES (-18)
324    #define N_EXCLUDE_FROM (-19)
325    #define N_INCLUDE_FROM (-20)
326    #define N_OM_SEPARATOR (-21)
327    
328  static option_item optionlist[] = {  static option_item optionlist[] = {
329    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
330    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
331    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
332    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
333    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
334    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
335    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
336    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
337    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
338    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
339    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
340    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
341    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
342    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
343    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
344    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
345    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
347    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
348    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
349    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
350    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
351    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },  #ifdef SUPPORT_PCREGREP_JIT
352    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
353    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },  #else
354    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
355    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },  #endif
356      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
357      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
358      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
359      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
360      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
361      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
362      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
363      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
365      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
367      { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368      { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
370      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
371      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
372      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
373      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377    
378      /* These two were accidentally implemented with underscores instead of
379      hyphens in the option names. As this was not discovered for several releases,
380      the incorrect versions are left in the table for compatibility. However, the
381      --help function misses out any option that has an underscore in its name. */
382    
383      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385    
386  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
387    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
388  #endif  #endif
# Line 202  static option_item optionlist[] = { Line 398  static option_item optionlist[] = {
398  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399  options. These set the 1, 2, and 4 bits in process_options, respectively. Note  options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400  that the combination of -w and -x has the same effect as -x on its own, so we  that the combination of -w and -x has the same effect as -x on its own, so we
401  can treat them as the same. */  can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402    prefix+suffix is 10 characters; if anything longer is added, it must be
403    adjusted. */
404    
405  static const char *prefix[] = {  static const char *prefix[] = {
406    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
# Line 210  static const char *prefix[] = { Line 408  static const char *prefix[] = {
408  static const char *suffix[] = {  static const char *suffix[] = {
409    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
410    
411    /* UTF-8 tables - used only when the newline setting is "any". */
412    
413    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
414    
415    const char utf8_table4[] = {
416      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
417      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
418      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
419      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
420    
421    
422    
423    /*************************************************
424    *         Exit from the program                  *
425    *************************************************/
426    
427    /* If there has been a resource error, give a suitable message.
428    
429    Argument:  the return code
430    Returns:   does not return
431    */
432    
433    static void
434    pcregrep_exit(int rc)
435    {
436    if (resource_error)
437      {
438      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440        PCRE_ERROR_JIT_STACKLIMIT);
441      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442      }
443    exit(rc);
444    }
445    
446    
447    /*************************************************
448    *          Add item to chain of patterns         *
449    *************************************************/
450    
451    /* Used to add an item onto a chain, or just return an unconnected item if the
452    "after" argument is NULL.
453    
454    Arguments:
455      s          pattern string to add
456      after      if not NULL points to item to insert after
457    
458    Returns:     new pattern block
459    */
460    
461    static patstr *
462    add_pattern(char *s, patstr *after)
463    {
464    patstr *p = (patstr *)malloc(sizeof(patstr));
465    if (p == NULL)
466      {
467      fprintf(stderr, "pcregrep: malloc failed\n");
468      pcregrep_exit(2);
469      }
470    if (strlen(s) > MAXPATLEN)
471      {
472      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473        MAXPATLEN);
474      return NULL;
475      }
476    p->next = NULL;
477    p->string = s;
478    p->compiled = NULL;
479    p->hint = NULL;
480    
481    if (after != NULL)
482      {
483      p->next = after->next;
484      after->next = p;
485      }
486    return p;
487    }
488    
489    
490    /*************************************************
491    *           Free chain of patterns               *
492    *************************************************/
493    
494    /* Used for several chains of patterns.
495    
496    Argument: pointer to start of chain
497    Returns:  nothing
498    */
499    
500    static void
501    free_pattern_chain(patstr *pc)
502    {
503    while (pc != NULL)
504      {
505      patstr *p = pc;
506      pc = p->next;
507      if (p->hint != NULL) pcre_free_study(p->hint);
508      if (p->compiled != NULL) pcre_free(p->compiled);
509      free(p);
510      }
511    }
512    
513    
514    /*************************************************
515    *           Free chain of file names             *
516    *************************************************/
517    
518    /*
519    Argument: pointer to start of chain
520    Returns:  nothing
521    */
522    
523    static void
524    free_file_chain(fnstr *fn)
525    {
526    while (fn != NULL)
527      {
528      fnstr *f = fn;
529      fn = f->next;
530      free(f);
531      }
532    }
533    
534    
535  /*************************************************  /*************************************************
536  *            OS-specific functions               *  *            OS-specific functions               *
537  *************************************************/  *************************************************/
538    
539  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific.
540  although at present the only ones are for Unix, Win32, and for "no support". */  At present there are versions for Unix-style environments, Windows, native
541    z/OS, and "no support". */
542    
543    
544  /************* Directory scanning in Unix ***********/  /************* Directory scanning Unix-style and z/OS ***********/
545    
546  #if IS_UNIX  #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
547  #include <sys/types.h>  #include <sys/types.h>
548  #include <sys/stat.h>  #include <sys/stat.h>
549  #include <dirent.h>  #include <dirent.h>
550    
551    #if defined NATIVE_ZOS
552    /************* Directory and PDS/E scanning for z/OS ***********/
553    /************* z/OS looks mostly like Unix with USS ************/
554    /* However, z/OS needs the #include statements in this header */
555    #include "pcrzosfs.h"
556    /* That header is not included in the main PCRE distribution because
557       other apparatus is needed to compile pcregrep for z/OS. The header
558       can be found in the special z/OS distribution, which is available
559       from www.zaconsultants.net or from www.cbttape.org. */
560    #endif
561    
562  typedef DIR directory_type;  typedef DIR directory_type;
563    #define FILESEP '/'
564    
565  static int  static int
566  isdirectory(char *filename)  isdirectory(char *filename)
# Line 235  isdirectory(char *filename) Line 568  isdirectory(char *filename)
568  struct stat statbuf;  struct stat statbuf;
569  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
570    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
571  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
572  }  }
573    
574  static directory_type *  static directory_type *
# Line 254  for (;;) Line 587  for (;;)
587    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
588      return dent->d_name;      return dent->d_name;
589    }    }
590  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
591  }  }
592    
593  static void  static void
# Line 264  closedir(dir); Line 597  closedir(dir);
597  }  }
598    
599    
600  /************* Test for regular file in Unix **********/  /************* Test for regular file, Unix-style **********/
601    
602  static int  static int
603  isregfile(char *filename)  isregfile(char *filename)
# Line 276  return (statbuf.st_mode & S_IFMT) == S_I Line 609  return (statbuf.st_mode & S_IFMT) == S_I
609  }  }
610    
611    
612  /************* Test stdout for being a terminal in Unix **********/  #if defined NATIVE_ZOS
613    /************* Test for a terminal in z/OS **********/
614    /* isatty() does not work in a TSO environment, so always give FALSE.*/
615    
616    static BOOL
617    is_stdout_tty(void)
618    {
619    return FALSE;
620    }
621    
622    static BOOL
623    is_file_tty(FILE *f)
624    {
625    return FALSE;
626    }
627    
628    
629    /************* Test for a terminal, Unix-style **********/
630    
631    #else
632  static BOOL  static BOOL
633  is_stdout_tty(void)  is_stdout_tty(void)
634  {  {
635  return isatty(fileno(stdout));  return isatty(fileno(stdout));
636  }  }
637    
638    static BOOL
639    is_file_tty(FILE *f)
640    {
641    return isatty(fileno(f));
642    }
643    #endif
644    
645    /* End of Unix-style or native z/OS environment functions. */
646    
647  /************* Directory scanning in Win32 ***********/  
648    /************* Directory scanning in Windows ***********/
649    
650  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
651  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
652  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
653    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
654    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
655    undefined when it is indeed undefined. */
656    
657  #elif HAVE_WIN32API  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
658    
659  #ifndef STRICT  #ifndef STRICT
660  # define STRICT  # define STRICT
# Line 300  when it did not exist. */ Line 662  when it did not exist. */
662  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
663  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
664  #endif  #endif
665    
666    #include <windows.h>
667    
668  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
669  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
670  #endif  #endif
671    
 #include <windows.h>  
   
672  typedef struct directory_type  typedef struct directory_type
673  {  {
674  HANDLE handle;  HANDLE handle;
# Line 313  BOOL first; Line 676  BOOL first;
676  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
677  } directory_type;  } directory_type;
678    
679    #define FILESEP '/'
680    
681  int  int
682  isdirectory(char *filename)  isdirectory(char *filename)
683  {  {
684  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
685  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
686    return 0;    return 0;
687  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
688  }  }
689    
690  directory_type *  directory_type *
# Line 330  char *pattern; Line 695  char *pattern;
695  directory_type *dir;  directory_type *dir;
696  DWORD err;  DWORD err;
697  len = strlen(filename);  len = strlen(filename);
698  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
699  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
700  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
701    {    {
702    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
703    exit(2);    pcregrep_exit(2);
704    }    }
705  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
706  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 383  free(dir); Line 748  free(dir);
748  }  }
749    
750    
751  /************* Test for regular file in Win32 **********/  /************* Test for regular file in Windows **********/
752    
753  /* I don't know how to do this, or if it can be done; assume all paths are  /* I don't know how to do this, or if it can be done; assume all paths are
754  regular if they are not directories. */  regular if they are not directories. */
755    
756  int isregfile(char *filename)  int isregfile(char *filename)
757  {  {
758  return !isdirectory(filename)  return !isdirectory(filename);
759  }  }
760    
761    
762  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Windows **********/
763    
764  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
765    
766  static BOOL  static BOOL
767  is_stdout_tty(void)  is_stdout_tty(void)
768  {  {
769  FALSE;  return FALSE;
770    }
771    
772    static BOOL
773    is_file_tty(FILE *f)
774    {
775    return FALSE;
776  }  }
777    
778    /* End of Windows functions */
779    
780    
781  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
782    
# Line 411  FALSE; Line 784  FALSE;
784    
785  #else  #else
786    
787    #define FILESEP 0
788  typedef void directory_type;  typedef void directory_type;
789    
790  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
791  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
792  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
793  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
794    
795    
796  /************* Test for regular when we can't do it **********/  /************* Test for regular file when we can't do it **********/
797    
798  /* Assume all files are regular. */  /* Assume all files are regular. */
799    
800  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
801    
802    
803  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
804    
805  static BOOL  static BOOL
806  is_stdout_tty(void)  is_stdout_tty(void)
# Line 434  is_stdout_tty(void) Line 808  is_stdout_tty(void)
808  return FALSE;  return FALSE;
809  }  }
810    
811    static BOOL
812    is_file_tty(FILE *f)
813    {
814    return FALSE;
815    }
816    
817  #endif  #endif  /* End of system-specific functions */
818    
819    
820    
821  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
822  /*************************************************  /*************************************************
823  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
824  *************************************************/  *************************************************/
# Line 462  return sys_errlist[n]; Line 841  return sys_errlist[n];
841    
842    
843  /*************************************************  /*************************************************
844  *       Print the previous "after" lines         *  *                Usage function                  *
845  *************************************************/  *************************************************/
846    
847  /* This is called if we are about to lose said lines because of buffer filling,  static int
848  and at the end of the file. The data in the line is written using fwrite() so  usage(int rc)
849  that a binary zero does not terminate it.  {
850    option_item *op;
851    fprintf(stderr, "Usage: pcregrep [-");
852    for (op = optionlist; op->one_char != 0; op++)
853      {
854      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
855      }
856    fprintf(stderr, "] [long options] [pattern] [files]\n");
857    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
858      "options.\n");
859    return rc;
860    }
861    
 Arguments:  
   lastmatchnumber   the number of the last matching line, plus one  
   lastmatchrestart  where we restarted after the last match  
   endptr            end of available data  
   printname         filename for printing  
862    
 Returns:            nothing  
 */  
863    
864  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,  /*************************************************
865    char *endptr, char *printname)  *                Help function                   *
866    *************************************************/
867    
868    static void
869    help(void)
870  {  {
871  if (after_context > 0 && lastmatchnumber > 0)  option_item *op;
872    
873    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
874    printf("Search for PATTERN in each FILE or standard input.\n");
875    printf("PATTERN must be present if neither -e nor -f is used.\n");
876    printf("\"-\" can be used as a file name to mean STDIN.\n");
877    
878    #ifdef SUPPORT_LIBZ
879    printf("Files whose names end in .gz are read using zlib.\n");
880    #endif
881    
882    #ifdef SUPPORT_LIBBZ2
883    printf("Files whose names end in .bz2 are read using bzlib2.\n");
884    #endif
885    
886    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
887    printf("Other files and the standard input are read as plain files.\n\n");
888    #else
889    printf("All files are read as plain files, without any interpretation.\n\n");
890    #endif
891    
892    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
893    printf("Options:\n");
894    
895    for (op = optionlist; op->one_char != 0; op++)
896    {    {
897    int count = 0;    int n;
898    while (lastmatchrestart < endptr && count++ < after_context)    char s[4];
899    
900      /* Two options were accidentally implemented and documented with underscores
901      instead of hyphens in their names, something that was not noticed for quite a
902      few releases. When fixing this, I left the underscored versions in the list
903      in case people were using them. However, we don't want to display them in the
904      help data. There are no other options that contain underscores, and we do not
905      expect ever to implement such options. Therefore, just omit any option that
906      contains an underscore. */
907    
908      if (strchr(op->long_name, '_') != NULL) continue;
909    
910      if (op->one_char > 0 && (op->long_name)[0] == 0)
911        n = 31 - printf("  -%c", op->one_char);
912      else
913      {      {
914      char *pp = lastmatchrestart;      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
915      if (printname != NULL) fprintf(stdout, "%s-", printname);        else strcpy(s, "   ");
916      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      n = 31 - printf("  %s --%s", s, op->long_name);
     while (*pp != '\n') pp++;  
     fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);  
     lastmatchrestart = pp + 1;  
917      }      }
918    hyphenpending = TRUE;  
919      if (n < 1) n = 1;
920      printf("%.*s%s\n", n, "                           ", op->help_text);
921    }    }
922    
923    printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
924    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
925    printf("When reading patterns or file names from a file, trailing white\n");
926    printf("space is removed and blank lines are ignored.\n");
927    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
928    
929    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
930    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
931  }  }
932    
933    
934    
935  /*************************************************  /*************************************************
936  *            Grep an individual file             *  *            Test exclude/includes               *
937  *************************************************/  *************************************************/
938    
939  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* If any exclude pattern matches, the path is excluded. Otherwise, unless
940  times the value of MBUFTHIRD. The matching point is never allowed to stray into  there are no includes, the path must match an include pattern.
 the top third of the buffer, thus keeping more of the file available for  
 context printing or for multiline scanning. For large files, the pointer will  
 be in the middle third most of the time, so the bottom third is available for  
 "before" context printing.  
941    
942  Arguments:  Arguments:
943    in           the fopened FILE stream    path      the path to be matched
944    printname    the file name if it is to be printed for each match    ip        the chain of include patterns
945                 or NULL if the file name is not to be printed    ep        the chain of exclude patterns
                it cannot be NULL if filenames[_nomatch]_only is set  
946    
947  Returns:       0 if there was at least one match  Returns:    TRUE if the path is not excluded
                1 otherwise (no matches)  
948  */  */
949    
950  static int  static BOOL
951  pcregrep(FILE *in, char *printname)  test_incexc(char *path, patstr *ip, patstr *ep)
952  {  {
953  int rc = 1;  int plen = strlen(path);
 int linenumber = 1;  
 int lastmatchnumber = 0;  
 int count = 0;  
 int offsets[99];  
 char *lastmatchrestart = NULL;  
 char buffer[3*MBUFTHIRD];  
 char *ptr = buffer;  
 char *endptr;  
 size_t bufflength;  
 BOOL endhyphenpending = FALSE;  
   
 /* Do the first read into the start of the buffer and set up the pointer to  
 end of what we have. */  
954    
955  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  for (; ep != NULL; ep = ep->next)
956  endptr = buffer + bufflength;    {
957      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
958        return FALSE;
959      }
960    
961  /* Loop while the current pointer is not at the end of the file. For large  if (ip == NULL) return TRUE;
 files, endptr will be at the end of the buffer when we are in the middle of the  
 file, but ptr will never get there, because as soon as it gets over 2/3 of the  
 way, the buffer is shifted left and re-filled. */  
962    
963  while (ptr < endptr)  for (; ip != NULL; ip = ip->next)
964    {    {
965    int i;    if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
966    int mrc = 0;      return TRUE;
967    BOOL match = FALSE;    }
   char *t = ptr;  
   size_t length, linelength;  
968    
969    /* At this point, ptr is at the start of a line. We need to find the length  return FALSE;
970    of the subject string to pass to pcre_exec(). In multiline mode, it is the  }
   length remainder of the data in the buffer. Otherwise, it is the length of  
   the next line. After matching, we always advance by the length of the next  
   line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so  
   that any match is constrained to be in the first line. */  
   
   linelength = 0;  
   while (t < endptr && *t++ != '\n') linelength++;  
   length = multiline? endptr - ptr : linelength;  
   
   /* Run through all the patterns until one matches. Note that we don't include  
   the final newline in the subject string. */  
   
   for (i = 0; i < pattern_count; i++)  
     {  
     mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
971    
972    /* If it's a match or a not-match (as required), do what's wanted. */  
973    
974    /*************************************************
975    *         Decode integer argument value          *
976    *************************************************/
977    
978    /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
979    because SunOS4 doesn't have it. This is used only for unpicking arguments, so
980    just keep it simple.
981    
982    Arguments:
983      option_data   the option data string
984      op            the option item (for error messages)
985      longop        TRUE if option given in long form
986    
987    Returns:        a long integer
988    */
989    
990    static long int
991    decode_number(char *option_data, option_item *op, BOOL longop)
992    {
993    unsigned long int n = 0;
994    char *endptr = option_data;
995    while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
996    while (isdigit((unsigned char)(*endptr)))
997      n = n * 10 + (int)(*endptr++ - '0');
998    if (toupper(*endptr) == 'K')
999      {
1000      n *= 1024;
1001      endptr++;
1002      }
1003    else if (toupper(*endptr) == 'M')
1004      {
1005      n *= 1024*1024;
1006      endptr++;
1007      }
1008    
1009    if (*endptr != 0)   /* Error */
1010      {
1011      if (longop)
1012        {
1013        char *equals = strchr(op->long_name, '=');
1014        int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1015          (int)(equals - op->long_name);
1016        fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1017          option_data, nlen, op->long_name);
1018        }
1019      else
1020        fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1021          option_data, op->one_char);
1022      pcregrep_exit(usage(2));
1023      }
1024    
1025    return n;
1026    }
1027    
1028    
1029    
1030    /*************************************************
1031    *       Add item to a chain of numbers           *
1032    *************************************************/
1033    
1034    /* Used to add an item onto a chain, or just return an unconnected item if the
1035    "after" argument is NULL.
1036    
1037    Arguments:
1038      n          the number to add
1039      after      if not NULL points to item to insert after
1040    
1041    Returns:     new number block
1042    */
1043    
1044    static omstr *
1045    add_number(int n, omstr *after)
1046    {
1047    omstr *om = (omstr *)malloc(sizeof(omstr));
1048    
1049    if (om == NULL)
1050      {
1051      fprintf(stderr, "pcregrep: malloc failed\n");
1052      pcregrep_exit(2);
1053      }
1054    om->next = NULL;
1055    om->groupnum = n;
1056    
1057    if (after != NULL)
1058      {
1059      om->next = after->next;
1060      after->next = om;
1061      }
1062    return om;
1063    }
1064    
1065    
1066    
1067    /*************************************************
1068    *            Read one line of input              *
1069    *************************************************/
1070    
1071    /* Normally, input is read using fread() into a large buffer, so many lines may
1072    be read at once. However, doing this for tty input means that no output appears
1073    until a lot of input has been typed. Instead, tty input is handled line by
1074    line. We cannot use fgets() for this, because it does not stop at a binary
1075    zero, and therefore there is no way of telling how many characters it has read,
1076    because there may be binary zeros embedded in the data.
1077    
1078    Arguments:
1079      buffer     the buffer to read into
1080      length     the maximum number of characters to read
1081      f          the file
1082    
1083    Returns:     the number of characters read, zero at end of file
1084    */
1085    
1086    static unsigned int
1087    read_one_line(char *buffer, int length, FILE *f)
1088    {
1089    int c;
1090    int yield = 0;
1091    while ((c = fgetc(f)) != EOF)
1092      {
1093      buffer[yield++] = c;
1094      if (c == '\n' || yield >= length) break;
1095      }
1096    return yield;
1097    }
1098    
1099    
1100    
1101    /*************************************************
1102    *             Find end of line                   *
1103    *************************************************/
1104    
1105    /* The length of the endline sequence that is found is set via lenptr. This may
1106    be zero at the very end of the file if there is no line-ending sequence there.
1107    
1108    Arguments:
1109      p         current position in line
1110      endptr    end of available data
1111      lenptr    where to put the length of the eol sequence
1112    
1113    Returns:    pointer after the last byte of the line,
1114                including the newline byte(s)
1115    */
1116    
1117    static char *
1118    end_of_line(char *p, char *endptr, int *lenptr)
1119    {
1120    switch(endlinetype)
1121      {
1122      default:      /* Just in case */
1123      case EL_LF:
1124      while (p < endptr && *p != '\n') p++;
1125      if (p < endptr)
1126        {
1127        *lenptr = 1;
1128        return p + 1;
1129        }
1130      *lenptr = 0;
1131      return endptr;
1132    
1133      case EL_CR:
1134      while (p < endptr && *p != '\r') p++;
1135      if (p < endptr)
1136        {
1137        *lenptr = 1;
1138        return p + 1;
1139        }
1140      *lenptr = 0;
1141      return endptr;
1142    
1143      case EL_CRLF:
1144      for (;;)
1145        {
1146        while (p < endptr && *p != '\r') p++;
1147        if (++p >= endptr)
1148          {
1149          *lenptr = 0;
1150          return endptr;
1151          }
1152        if (*p == '\n')
1153          {
1154          *lenptr = 2;
1155          return p + 1;
1156          }
1157        }
1158      break;
1159    
1160      case EL_ANYCRLF:
1161      while (p < endptr)
1162        {
1163        int extra = 0;
1164        register int c = *((unsigned char *)p);
1165    
1166        if (utf8 && c >= 0xc0)
1167          {
1168          int gcii, gcss;
1169          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1170          gcss = 6*extra;
1171          c = (c & utf8_table3[extra]) << gcss;
1172          for (gcii = 1; gcii <= extra; gcii++)
1173            {
1174            gcss -= 6;
1175            c |= (p[gcii] & 0x3f) << gcss;
1176            }
1177          }
1178    
1179        p += 1 + extra;
1180    
1181        switch (c)
1182          {
1183          case '\n':
1184          *lenptr = 1;
1185          return p;
1186    
1187          case '\r':
1188          if (p < endptr && *p == '\n')
1189            {
1190            *lenptr = 2;
1191            p++;
1192            }
1193          else *lenptr = 1;
1194          return p;
1195    
1196          default:
1197          break;
1198          }
1199        }   /* End of loop for ANYCRLF case */
1200    
1201      *lenptr = 0;  /* Must have hit the end */
1202      return endptr;
1203    
1204      case EL_ANY:
1205      while (p < endptr)
1206        {
1207        int extra = 0;
1208        register int c = *((unsigned char *)p);
1209    
1210        if (utf8 && c >= 0xc0)
1211          {
1212          int gcii, gcss;
1213          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1214          gcss = 6*extra;
1215          c = (c & utf8_table3[extra]) << gcss;
1216          for (gcii = 1; gcii <= extra; gcii++)
1217            {
1218            gcss -= 6;
1219            c |= (p[gcii] & 0x3f) << gcss;
1220            }
1221          }
1222    
1223        p += 1 + extra;
1224    
1225        switch (c)
1226          {
1227          case '\n':    /* LF */
1228          case '\v':    /* VT */
1229          case '\f':    /* FF */
1230          *lenptr = 1;
1231          return p;
1232    
1233          case '\r':    /* CR */
1234          if (p < endptr && *p == '\n')
1235            {
1236            *lenptr = 2;
1237            p++;
1238            }
1239          else *lenptr = 1;
1240          return p;
1241    
1242    #ifndef EBCDIC
1243          case 0x85:    /* Unicode NEL */
1244          *lenptr = utf8? 2 : 1;
1245          return p;
1246    
1247          case 0x2028:  /* Unicode LS */
1248          case 0x2029:  /* Unicode PS */
1249          *lenptr = 3;
1250          return p;
1251    #endif  /* Not EBCDIC */
1252    
1253          default:
1254          break;
1255          }
1256        }   /* End of loop for ANY case */
1257    
1258      *lenptr = 0;  /* Must have hit the end */
1259      return endptr;
1260      }     /* End of overall switch */
1261    }
1262    
1263    
1264    
1265    /*************************************************
1266    *         Find start of previous line            *
1267    *************************************************/
1268    
1269    /* This is called when looking back for before lines to print.
1270    
1271    Arguments:
1272      p         start of the subsequent line
1273      startptr  start of available data
1274    
1275    Returns:    pointer to the start of the previous line
1276    */
1277    
1278    static char *
1279    previous_line(char *p, char *startptr)
1280    {
1281    switch(endlinetype)
1282      {
1283      default:      /* Just in case */
1284      case EL_LF:
1285      p--;
1286      while (p > startptr && p[-1] != '\n') p--;
1287      return p;
1288    
1289      case EL_CR:
1290      p--;
1291      while (p > startptr && p[-1] != '\n') p--;
1292      return p;
1293    
1294      case EL_CRLF:
1295      for (;;)
1296        {
1297        p -= 2;
1298        while (p > startptr && p[-1] != '\n') p--;
1299        if (p <= startptr + 1 || p[-2] == '\r') return p;
1300        }
1301      return p;   /* But control should never get here */
1302    
1303      case EL_ANY:
1304      case EL_ANYCRLF:
1305      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1306      if (utf8) while ((*p & 0xc0) == 0x80) p--;
1307    
1308      while (p > startptr)
1309        {
1310        register unsigned int c;
1311        char *pp = p - 1;
1312    
1313        if (utf8)
1314          {
1315          int extra = 0;
1316          while ((*pp & 0xc0) == 0x80) pp--;
1317          c = *((unsigned char *)pp);
1318          if (c >= 0xc0)
1319            {
1320            int gcii, gcss;
1321            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1322            gcss = 6*extra;
1323            c = (c & utf8_table3[extra]) << gcss;
1324            for (gcii = 1; gcii <= extra; gcii++)
1325              {
1326              gcss -= 6;
1327              c |= (pp[gcii] & 0x3f) << gcss;
1328              }
1329            }
1330          }
1331        else c = *((unsigned char *)pp);
1332    
1333        if (endlinetype == EL_ANYCRLF) switch (c)
1334          {
1335          case '\n':    /* LF */
1336          case '\r':    /* CR */
1337          return p;
1338    
1339          default:
1340          break;
1341          }
1342    
1343        else switch (c)
1344          {
1345          case '\n':    /* LF */
1346          case '\v':    /* VT */
1347          case '\f':    /* FF */
1348          case '\r':    /* CR */
1349    #ifndef EBCDIE
1350          case 0x85:    /* Unicode NEL */
1351          case 0x2028:  /* Unicode LS */
1352          case 0x2029:  /* Unicode PS */
1353    #endif  /* Not EBCDIC */
1354          return p;
1355    
1356          default:
1357          break;
1358          }
1359    
1360        p = pp;  /* Back one character */
1361        }        /* End of loop for ANY case */
1362    
1363      return startptr;  /* Hit start of data */
1364      }     /* End of overall switch */
1365    }
1366    
1367    
1368    
1369    
1370    
1371    /*************************************************
1372    *       Print the previous "after" lines         *
1373    *************************************************/
1374    
1375    /* This is called if we are about to lose said lines because of buffer filling,
1376    and at the end of the file. The data in the line is written using fwrite() so
1377    that a binary zero does not terminate it.
1378    
1379    Arguments:
1380      lastmatchnumber   the number of the last matching line, plus one
1381      lastmatchrestart  where we restarted after the last match
1382      endptr            end of available data
1383      printname         filename for printing
1384    
1385    Returns:            nothing
1386    */
1387    
1388    static void
1389    do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1390      char *printname)
1391    {
1392    if (after_context > 0 && lastmatchnumber > 0)
1393      {
1394      int count = 0;
1395      while (lastmatchrestart < endptr && count++ < after_context)
1396        {
1397        int ellength;
1398        char *pp = lastmatchrestart;
1399        if (printname != NULL) fprintf(stdout, "%s-", printname);
1400        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1401        pp = end_of_line(pp, endptr, &ellength);
1402        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1403        lastmatchrestart = pp;
1404        }
1405      hyphenpending = TRUE;
1406      }
1407    }
1408    
1409    
1410    
1411    /*************************************************
1412    *   Apply patterns to subject till one matches   *
1413    *************************************************/
1414    
1415    /* This function is called to run through all patterns, looking for a match. It
1416    is used multiple times for the same subject when colouring is enabled, in order
1417    to find all possible matches.
1418    
1419    Arguments:
1420      matchptr     the start of the subject
1421      length       the length of the subject to match
1422      options      options for pcre_exec
1423      startoffset  where to start matching
1424      offsets      the offets vector to fill in
1425      mrc          address of where to put the result of pcre_exec()
1426    
1427    Returns:      TRUE if there was a match
1428                  FALSE if there was no match
1429                  invert if there was a non-fatal error
1430    */
1431    
1432    static BOOL
1433    match_patterns(char *matchptr, size_t length, unsigned int options,
1434      int startoffset, int *offsets, int *mrc)
1435    {
1436    int i;
1437    size_t slen = length;
1438    patstr *p = patterns;
1439    const char *msg = "this text:\n\n";
1440    
1441    if (slen > 200)
1442      {
1443      slen = 200;
1444      msg = "text that starts:\n\n";
1445      }
1446    for (i = 1; p != NULL; p = p->next, i++)
1447      {
1448      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1449        startoffset, options, offsets, OFFSET_SIZE);
1450      if (*mrc >= 0) return TRUE;
1451      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1452      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1453      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1454      fprintf(stderr, "%s", msg);
1455      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1456      fprintf(stderr, "\n\n");
1457      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1458          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1459        resource_error = TRUE;
1460      if (error_count++ > 20)
1461        {
1462        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1463        pcregrep_exit(2);
1464        }
1465      return invert;    /* No more matching; don't show the line again */
1466      }
1467    
1468    return FALSE;  /* No match, no errors */
1469    }
1470    
1471    
1472    
1473    /*************************************************
1474    *            Grep an individual file             *
1475    *************************************************/
1476    
1477    /* This is called from grep_or_recurse() below. It uses a buffer that is three
1478    times the value of bufthird. The matching point is never allowed to stray into
1479    the top third of the buffer, thus keeping more of the file available for
1480    context printing or for multiline scanning. For large files, the pointer will
1481    be in the middle third most of the time, so the bottom third is available for
1482    "before" context printing.
1483    
1484    Arguments:
1485      handle       the fopened FILE stream for a normal file
1486                   the gzFile pointer when reading is via libz
1487                   the BZFILE pointer when reading is via libbz2
1488      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1489      filename     the file name or NULL (for errors)
1490      printname    the file name if it is to be printed for each match
1491                   or NULL if the file name is not to be printed
1492                   it cannot be NULL if filenames[_nomatch]_only is set
1493    
1494    Returns:       0 if there was at least one match
1495                   1 otherwise (no matches)
1496                   2 if an overlong line is encountered
1497                   3 if there is a read error on a .bz2 file
1498    */
1499    
1500    static int
1501    pcregrep(void *handle, int frtype, char *filename, char *printname)
1502    {
1503    int rc = 1;
1504    int linenumber = 1;
1505    int lastmatchnumber = 0;
1506    int count = 0;
1507    int filepos = 0;
1508    int offsets[OFFSET_SIZE];
1509    char *lastmatchrestart = NULL;
1510    char *ptr = main_buffer;
1511    char *endptr;
1512    size_t bufflength;
1513    BOOL binary = FALSE;
1514    BOOL endhyphenpending = FALSE;
1515    BOOL input_line_buffered = line_buffered;
1516    FILE *in = NULL;                    /* Ensure initialized */
1517    
1518    #ifdef SUPPORT_LIBZ
1519    gzFile ingz = NULL;
1520    #endif
1521    
1522    #ifdef SUPPORT_LIBBZ2
1523    BZFILE *inbz2 = NULL;
1524    #endif
1525    
1526    
1527    /* Do the first read into the start of the buffer and set up the pointer to end
1528    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1529    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1530    fail. */
1531    
1532    (void)frtype;
1533    
1534    #ifdef SUPPORT_LIBZ
1535    if (frtype == FR_LIBZ)
1536      {
1537      ingz = (gzFile)handle;
1538      bufflength = gzread (ingz, main_buffer, bufsize);
1539      }
1540    else
1541    #endif
1542    
1543    #ifdef SUPPORT_LIBBZ2
1544    if (frtype == FR_LIBBZ2)
1545      {
1546      inbz2 = (BZFILE *)handle;
1547      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1548      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1549      }                                    /* without the cast it is unsigned. */
1550    else
1551    #endif
1552    
1553      {
1554      in = (FILE *)handle;
1555      if (is_file_tty(in)) input_line_buffered = TRUE;
1556      bufflength = input_line_buffered?
1557        read_one_line(main_buffer, bufsize, in) :
1558        fread(main_buffer, 1, bufsize, in);
1559      }
1560    
1561    endptr = main_buffer + bufflength;
1562    
1563    /* Unless binary-files=text, see if we have a binary file. This uses the same
1564    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1565    file. */
1566    
1567    if (binary_files != BIN_TEXT)
1568      {
1569      binary =
1570        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1571      if (binary && binary_files == BIN_NOMATCH) return 1;
1572      }
1573    
1574    /* Loop while the current pointer is not at the end of the file. For large
1575    files, endptr will be at the end of the buffer when we are in the middle of the
1576    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1577    way, the buffer is shifted left and re-filled. */
1578    
1579    while (ptr < endptr)
1580      {
1581      int endlinelength;
1582      int mrc = 0;
1583      int startoffset = 0;
1584      unsigned int options = 0;
1585      BOOL match;
1586      char *matchptr = ptr;
1587      char *t = ptr;
1588      size_t length, linelength;
1589    
1590      /* At this point, ptr is at the start of a line. We need to find the length
1591      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1592      length remainder of the data in the buffer. Otherwise, it is the length of
1593      the next line, excluding the terminating newline. After matching, we always
1594      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1595      option is used for compiling, so that any match is constrained to be in the
1596      first line. */
1597    
1598      t = end_of_line(t, endptr, &endlinelength);
1599      linelength = t - ptr - endlinelength;
1600      length = multiline? (size_t)(endptr - ptr) : linelength;
1601    
1602      /* Check to see if the line we are looking at extends right to the very end
1603      of the buffer without a line terminator. This means the line is too long to
1604      handle. */
1605    
1606      if (endlinelength == 0 && t == main_buffer + bufsize)
1607        {
1608        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1609                        "pcregrep: check the --buffer-size option\n",
1610                        linenumber,
1611                        (filename == NULL)? "" : " of file ",
1612                        (filename == NULL)? "" : filename);
1613        return 2;
1614        }
1615    
1616      /* Extra processing for Jeffrey Friedl's debugging. */
1617    
1618    #ifdef JFRIEDL_DEBUG
1619      if (jfriedl_XT || jfriedl_XR)
1620      {
1621    #     include <sys/time.h>
1622    #     include <time.h>
1623          struct timeval start_time, end_time;
1624          struct timezone dummy;
1625          int i;
1626    
1627          if (jfriedl_XT)
1628          {
1629              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1630              const char *orig = ptr;
1631              ptr = malloc(newlen + 1);
1632              if (!ptr) {
1633                      printf("out of memory");
1634                      pcregrep_exit(2);
1635              }
1636              endptr = ptr;
1637              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1638              for (i = 0; i < jfriedl_XT; i++) {
1639                      strncpy(endptr, orig,  length);
1640                      endptr += length;
1641              }
1642              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1643              length = newlen;
1644          }
1645    
1646          if (gettimeofday(&start_time, &dummy) != 0)
1647                  perror("bad gettimeofday");
1648    
1649    
1650          for (i = 0; i < jfriedl_XR; i++)
1651              match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1652                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1653    
1654          if (gettimeofday(&end_time, &dummy) != 0)
1655                  perror("bad gettimeofday");
1656    
1657          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1658                          -
1659                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1660    
1661          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1662          return 0;
1663      }
1664    #endif
1665    
1666      /* We come back here after a match when show_only_matching is set, in order
1667      to find any further matches in the same line. This applies to
1668      --only-matching, --file-offsets, and --line-offsets. */
1669    
1670      ONLY_MATCHING_RESTART:
1671    
1672      /* Run through all the patterns until one matches or there is an error other
1673      than NOMATCH. This code is in a subroutine so that it can be re-used for
1674      finding subsequent matches when colouring matched lines. After finding one
1675      match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1676      this line. */
1677    
1678      match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1679      options = PCRE_NOTEMPTY;
1680    
1681      /* If it's a match or a not-match (as required), do what's wanted. */
1682    
1683    if (match != invert)    if (match != invert)
1684      {      {
# Line 611  while (ptr < endptr) Line 1692  while (ptr < endptr)
1692    
1693      if (count_only) count++;      if (count_only) count++;
1694    
1695        /* When handling a binary file and binary-files==binary, the "binary"
1696        variable will be set true (it's false in all other cases). In this
1697        situation we just want to output the file name. No need to scan further. */
1698    
1699        else if (binary)
1700          {
1701          fprintf(stdout, "Binary file %s matches\n", filename);
1702          return 0;
1703          }
1704    
1705      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1706      in the file. */      in the file. */
1707    
1708      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1709        {        {
1710        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1711        return 0;        return 0;
# Line 624  while (ptr < endptr) Line 1715  while (ptr < endptr)
1715    
1716      else if (quiet) return 0;      else if (quiet) return 0;
1717    
1718      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched,
1719      does not pring any context. */      and/or one or more captured portions of it, as long as these strings are
1720        not empty. The --file-offsets and --line-offsets options output offsets for
1721        the matching substring (all three set show_only_matching). None of these
1722        mutually exclusive options prints any context. Afterwards, adjust the start
1723        and then jump back to look for further matches in the same line. If we are
1724        in invert mode, however, nothing is printed and we do not restart - this
1725        could still be useful because the return code is set. */
1726    
1727      else if (only_matching)      else if (show_only_matching)
1728        {        {
1729        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1730        if (number) fprintf(stdout, "%d:", linenumber);          {
1731        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1732        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1733    
1734            /* Handle --line-offsets */
1735    
1736            if (line_offsets)
1737              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1738                offsets[1] - offsets[0]);
1739    
1740            /* Handle --file-offsets */
1741    
1742            else if (file_offsets)
1743              fprintf(stdout, "%d,%d\n",
1744                (int)(filepos + matchptr + offsets[0] - ptr),
1745                offsets[1] - offsets[0]);
1746    
1747            /* Handle --only-matching, which may occur many times */
1748    
1749            else
1750              {
1751              BOOL printed = FALSE;
1752              omstr *om;
1753    
1754              for (om = only_matching; om != NULL; om = om->next)
1755                {
1756                int n = om->groupnum;
1757                if (n < mrc)
1758                  {
1759                  int plen = offsets[2*n + 1] - offsets[2*n];
1760                  if (plen > 0)
1761                    {
1762                    if (printed) fprintf(stdout, "%s", om_separator);
1763                    if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1764                    FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1765                    if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1766                    printed = TRUE;
1767                    }
1768                  }
1769                }
1770    
1771              if (printed || printname != NULL || number) fprintf(stdout, "\n");
1772              }
1773    
1774            /* Prepare to repeat to find the next match */
1775    
1776            match = FALSE;
1777            if (line_buffered) fflush(stdout);
1778            rc = 0;                      /* Had some success */
1779            startoffset = offsets[1];    /* Restart after the match */
1780            goto ONLY_MATCHING_RESTART;
1781            }
1782        }        }
1783    
1784      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 646  while (ptr < endptr) Line 1792  while (ptr < endptr)
1792    
1793        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1794          {          {
1795            int ellength;
1796          int linecount = 0;          int linecount = 0;
1797          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1798    
1799          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1800            {            {
1801            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1802            linecount++;            linecount++;
1803            }            }
1804    
# Line 665  while (ptr < endptr) Line 1811  while (ptr < endptr)
1811            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1812            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1813            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1814            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1815            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1816            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1817            }            }
1818          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1819          }          }
# Line 689  while (ptr < endptr) Line 1835  while (ptr < endptr)
1835          int linecount = 0;          int linecount = 0;
1836          char *p = ptr;          char *p = ptr;
1837    
1838          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1839                 linecount < before_context)                 linecount < before_context)
1840            {            {
1841            linecount++;            linecount++;
1842            p--;            p = previous_line(p, main_buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1843            }            }
1844    
1845          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 702  while (ptr < endptr) Line 1847  while (ptr < endptr)
1847    
1848          while (p < ptr)          while (p < ptr)
1849            {            {
1850              int ellength;
1851            char *pp = p;            char *pp = p;
1852            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1853            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1854            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1855            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            FWRITE(p, 1, pp - p, stdout);
1856            p = pp + 1;            p = pp;
1857            }            }
1858          }          }
1859    
# Line 722  while (ptr < endptr) Line 1868  while (ptr < endptr)
1868    
1869        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1870        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1871        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1872        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1873          the match will always be before the first newline sequence. */
1874    
1875        if (multiline)        if (multiline & !invert)
1876          {          {
1877          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1878          t = ptr;          t = ptr;
1879          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t <= endmatch)
1880          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1881          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &endlinelength);
1882              if (t < endmatch) linenumber++; else break;
1883              }
1884            linelength = t - ptr - endlinelength;
1885          }          }
1886    
1887        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 746  while (ptr < endptr) Line 1896  while (ptr < endptr)
1896          {          {
1897          int first = S_arg * 2;          int first = S_arg * 2;
1898          int last  = first + 1;          int last  = first + 1;
1899          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1900          fprintf(stdout, "X");          fprintf(stdout, "X");
1901          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1902          }          }
1903        else        else
1904  #endif  #endif
1905    
1906        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1907          matches, but not of course if the line is a non-match. */
1908    
1909        if (do_colour)        if (do_colour && !invert)
1910          {          {
1911          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1912            FWRITE(ptr, 1, offsets[0], stdout);
1913          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1914          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1915          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1916          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1917              {
1918              startoffset = offsets[1];
1919              if (startoffset >= (int)linelength + endlinelength ||
1920                  !match_patterns(matchptr, length, options, startoffset, offsets,
1921                    &mrc))
1922                break;
1923              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1924              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1925              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1926              fprintf(stdout, "%c[00m", 0x1b);
1927              }
1928    
1929            /* In multiline mode, we may have already printed the complete line
1930            and its line-ending characters (if they matched the pattern), so there
1931            may be no more to print. */
1932    
1933            plength = (int)((linelength + endlinelength) - startoffset);
1934            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1935          }          }
       else fwrite(ptr, 1, linelength, stdout);  
1936    
1937        fprintf(stdout, "\n");        /* Not colouring; no need to search for further matches */
1938    
1939          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1940        }        }
1941    
1942      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1943        given, flush the output. */
1944    
1945        if (line_buffered) fflush(stdout);
1946      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1947    
1948      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1949      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1950    
1951      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1952      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1953      }      }
1954    
1955    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1956      anything to be printed), we have to move on to the end of the match before
1957      proceeding. */
1958    
1959      if (multiline && invert && match)
1960        {
1961        int ellength;
1962        char *endmatch = ptr + offsets[1];
1963        t = ptr;
1964        while (t < endmatch)
1965          {
1966          t = end_of_line(t, endptr, &ellength);
1967          if (t <= endmatch) linenumber++; else break;
1968          }
1969        endmatch = end_of_line(endmatch, endptr, &ellength);
1970        linelength = endmatch - ptr - ellength;
1971        }
1972    
1973      /* Advance to after the newline and increment the line number. The file
1974      offset to the current line is maintained in filepos. */
1975    
1976    ptr += linelength + 1;    ptr += linelength + endlinelength;
1977      filepos += (int)(linelength + endlinelength);
1978    linenumber++;    linenumber++;
1979    
1980      /* If input is line buffered, and the buffer is not yet full, read another
1981      line and add it into the buffer. */
1982    
1983      if (input_line_buffered && bufflength < (size_t)bufsize)
1984        {
1985        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1986        bufflength += add;
1987        endptr += add;
1988        }
1989    
1990    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1991    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1992    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1993    about to be lost, print them. */    about to be lost, print them. */
1994    
1995    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1996      {      {
1997      if (after_context > 0 &&      if (after_context > 0 &&
1998          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1999          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
2000        {        {
2001        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2002        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 801  while (ptr < endptr) Line 2004  while (ptr < endptr)
2004    
2005      /* Now do the shuffle */      /* Now do the shuffle */
2006    
2007      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2008      ptr -= MBUFTHIRD;      ptr -= bufthird;
2009      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
2010      endptr = buffer + bufflength;  #ifdef SUPPORT_LIBZ
2011        if (frtype == FR_LIBZ)
2012          bufflength = 2*bufthird +
2013            gzread (ingz, main_buffer + 2*bufthird, bufthird);
2014        else
2015    #endif
2016    
2017    #ifdef SUPPORT_LIBBZ2
2018        if (frtype == FR_LIBBZ2)
2019          bufflength = 2*bufthird +
2020            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2021        else
2022    #endif
2023    
2024        bufflength = 2*bufthird +
2025          (input_line_buffered?
2026           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2027           fread(main_buffer + 2*bufthird, 1, bufthird, in));
2028        endptr = main_buffer + bufflength;
2029    
2030      /* Adjust any last match point */      /* Adjust any last match point */
2031    
2032      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2033      }      }
2034    }     /* Loop through the whole file */    }     /* Loop through the whole file */
2035    
2036  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
2037  hyphenpending if it prints something. */  hyphenpending if it prints something. */
2038    
2039  if (!only_matching && !count_only)  if (!show_only_matching && !count_only)
2040    {    {
2041    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2042    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 834  if (filenames == FN_NOMATCH_ONLY) Line 2055  if (filenames == FN_NOMATCH_ONLY)
2055    
2056  if (count_only)  if (count_only)
2057    {    {
2058    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
2059    fprintf(stdout, "%d\n", count);      {
2060        if (printname != NULL && filenames != FN_NONE)
2061          fprintf(stdout, "%s:", printname);
2062        fprintf(stdout, "%d\n", count);
2063        }
2064    }    }
2065    
2066  return rc;  return rc;
# Line 855  Arguments: Line 2080  Arguments:
2080    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
2081    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
2082    
2083  Returns:   0 if there was at least one match  Returns:  -1 the file/directory was skipped
2084               0 if there was at least one match
2085             1 if there were no matches             1 if there were no matches
2086             2 there was some kind of error             2 there was some kind of error
2087    
# Line 866  static int Line 2092  static int
2092  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2093  {  {
2094  int rc = 1;  int rc = 1;
2095  int sep;  int frtype;
2096  FILE *in;  void *handle;
2097    char *lastcomp;
2098    FILE *in = NULL;           /* Ensure initialized */
2099    
2100    #ifdef SUPPORT_LIBZ
2101    gzFile ingz = NULL;
2102    #endif
2103    
2104    #ifdef SUPPORT_LIBBZ2
2105    BZFILE *inbz2 = NULL;
2106    #endif
2107    
2108    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2109    int pathlen;
2110    #endif
2111    
2112    #if defined NATIVE_ZOS
2113    int zos_type;
2114    FILE *zos_test_file;
2115    #endif
2116    
2117  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
2118    
2119  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
2120    {    {
2121    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN, stdin_name,
2122      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2123        stdin_name : NULL);        stdin_name : NULL);
2124    }    }
2125    
2126    /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2127    directories, whereas --include and --exclude apply to everything else. The test
2128    is against the final component of the path. */
2129    
2130    lastcomp = strrchr(pathname, FILESEP);
2131    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2132    
2133    /* If the file is a directory, skip if not recursing or if explicitly excluded.
2134    Otherwise, scan the directory and recurse for each path within it. The scanning
2135    code is localized so it can be made system-specific. */
2136    
2137    
2138    /* For z/OS, determine the file type. */
2139    
2140    #if defined NATIVE_ZOS
2141    zos_test_file =  fopen(pathname,"rb");
2142    
2143    if (zos_test_file == NULL)
2144       {
2145       if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2146         pathname, strerror(errno));
2147       return -1;
2148       }
2149    zos_type = identifyzosfiletype (zos_test_file);
2150    fclose (zos_test_file);
2151    
2152    /* Handle a PDS in separate code */
2153    
2154    if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2155       {
2156       return travelonpdsdir (pathname, only_one_at_top);
2157       }
2158    
2159    /* Deal with regular files in the normal way below. These types are:
2160       zos_type == __ZOS_PDS_MEMBER
2161       zos_type == __ZOS_PS
2162       zos_type == __ZOS_VSAM_KSDS
2163       zos_type == __ZOS_VSAM_ESDS
2164       zos_type == __ZOS_VSAM_RRDS
2165    */
2166    
2167    /* Handle a z/OS directory using common code. */
2168    
2169    else if (zos_type == __ZOS_HFS)
2170     {
2171    #endif  /* NATIVE_ZOS */
2172    
 /* If the file is a directory, skip if skipping or if we are recursing, scan  
 each file within it, subject to any include or exclude patterns that were set.  
 The scanning code is localized so it can be made system-specific. */  
2173    
2174  if ((sep = isdirectory(pathname)) != 0)  /* Handle directories: common code for all OS */
2175    
2176    if (isdirectory(pathname))
2177    {    {
2178    if (dee_action == dee_SKIP) return 1;    if (dee_action == dee_SKIP ||
2179          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2180        return -1;
2181    
2182    if (dee_action == dee_RECURSE)    if (dee_action == dee_RECURSE)
2183      {      {
2184      char buffer[1024];      char buffer[1024];
# Line 902  if ((sep = isdirectory(pathname)) != 0) Line 2195  if ((sep = isdirectory(pathname)) != 0)
2195    
2196      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
2197        {        {
2198        int frc, blen;        int frc;
2199        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
       blen = strlen(buffer);  
   
       if (exclude_compiled != NULL &&  
           pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
         continue;  
   
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
   
2200        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2201        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
2202         else if (frc == 0 && rc == 1) rc = 0;         else if (frc == 0 && rc == 1) rc = 0;
# Line 924  if ((sep = isdirectory(pathname)) != 0) Line 2207  if ((sep = isdirectory(pathname)) != 0)
2207      }      }
2208    }    }
2209    
2210  /* If the file is not a directory and not a regular file, skip it if that's  #if defined NATIVE_ZOS
2211  been requested. */   }
2212    #endif
2213    
2214  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;  /* If the file is not a directory, check for a regular file, and if it is not,
2215    skip it if that's been requested. Otherwise, check for an explicit inclusion or
2216    exclusion. */
2217    
2218    else if (
2219    #if defined NATIVE_ZOS
2220            (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2221    #else  /* all other OS */
2222            (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2223    #endif
2224            !test_incexc(lastcomp, include_patterns, exclude_patterns))
2225      return -1;  /* File skipped */
2226    
2227  /* Control reaches here if we have a regular file, or if we have a directory  /* Control reaches here if we have a regular file, or if we have a directory
2228  and recursion or skipping was not requested, or if we have anything else and  and recursion or skipping was not requested, or if we have anything else and
# Line 935  skipping was not requested. The scan pro Line 2230  skipping was not requested. The scan pro
2230  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
2231  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
2232    
2233  in = fopen(pathname, "r");  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2234  if (in == NULL)  pathlen = (int)(strlen(pathname));
2235    #endif
2236    
2237    /* Open using zlib if it is supported and the file name ends with .gz. */
2238    
2239    #ifdef SUPPORT_LIBZ
2240    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2241    {    {
2242    if (!silent)    ingz = gzopen(pathname, "rb");
2243      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,    if (ingz == NULL)
2244        strerror(errno));      {
2245    return 2;      if (!silent)
2246          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2247            strerror(errno));
2248        return 2;
2249        }
2250      handle = (void *)ingz;
2251      frtype = FR_LIBZ;
2252    }    }
2253    else
2254    #endif
2255    
2256  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);  
   
 fclose(in);  
 return rc;  
 }  
2257    
2258    #ifdef SUPPORT_LIBBZ2
2259    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2260      {
2261      inbz2 = BZ2_bzopen(pathname, "rb");
2262      handle = (void *)inbz2;
2263      frtype = FR_LIBBZ2;
2264      }
2265    else
2266    #endif
2267    
2268    /* Otherwise use plain fopen(). The label is so that we can come back here if
2269    an attempt to read a .bz2 file indicates that it really is a plain file. */
2270    
2271    #ifdef SUPPORT_LIBBZ2
2272    PLAIN_FILE:
2273    #endif
2274      {
2275      in = fopen(pathname, "rb");
2276      handle = (void *)in;
2277      frtype = FR_PLAIN;
2278      }
2279    
2280  /*************************************************  /* All the opening methods return errno when they fail. */
 *                Usage function                  *  
 *************************************************/  
2281    
2282  static int  if (handle == NULL)
 usage(int rc)  
 {  
 option_item *op;  
 fprintf(stderr, "Usage: pcregrep [-");  
 for (op = optionlist; op->one_char != 0; op++)  
2283    {    {
2284    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (!silent)
2285        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2286          strerror(errno));
2287      return 2;
2288    }    }
 fprintf(stderr, "] [long options] [pattern] [files]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
 }  
   
2289    
2290    /* Now grep the file */
2291    
2292    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2293      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2294    
2295  /*************************************************  /* Close in an appropriate manner. */
 *                Help function                   *  
 *************************************************/  
2296    
2297  static void  #ifdef SUPPORT_LIBZ
2298  help(void)  if (frtype == FR_LIBZ)
2299  {    gzclose(ingz);
2300  option_item *op;  else
2301    #endif
2302    
2303  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2304  printf("Search for PATTERN in each FILE or standard input.\n");  read failed. If the error indicates that the file isn't in fact bzipped, try
2305  printf("PATTERN must be present if neither -e nor -f is used.\n");  again as a normal file. */
2306  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  
2307  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  #ifdef SUPPORT_LIBBZ2
2308    if (frtype == FR_LIBBZ2)
2309      {
2310      if (rc == 3)
2311        {
2312        int errnum;
2313        const char *err = BZ2_bzerror(inbz2, &errnum);
2314        if (errnum == BZ_DATA_ERROR_MAGIC)
2315          {
2316          BZ2_bzclose(inbz2);
2317          goto PLAIN_FILE;
2318          }
2319        else if (!silent)
2320          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2321            pathname, err);
2322        rc = 2;    /* The normal "something went wrong" code */
2323        }
2324      BZ2_bzclose(inbz2);
2325      }
2326    else
2327    #endif
2328    
2329  printf("Options:\n");  /* Normal file close */
2330    
2331  for (op = optionlist; op->one_char != 0; op++)  fclose(in);
   {  
   int n;  
   char s[4];  
   if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
   printf("  %s --%s%n", s, op->long_name, &n);  
   n = 30 - n;  
   if (n < 1) n = 1;  
   printf("%.*s%s\n", n, "                    ", op->help_text);  
   }  
2332    
2333  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  /* Pass back the yield from pcregrep(). */
 printf("trailing white space is removed and blank lines are ignored.\n");  
 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
2334    
2335  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  return rc;
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
2336  }  }
2337    
2338    
2339    
   
2340  /*************************************************  /*************************************************
2341  *    Handle a single-letter, no data option      *  *    Handle a single-letter, no data option      *
2342  *************************************************/  *************************************************/
# Line 1023  handle_option(int letter, int options) Line 2346  handle_option(int letter, int options)
2346  {  {
2347  switch(letter)  switch(letter)
2348    {    {
2349    case N_HELP: help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
2350      case N_HELP: help(); pcregrep_exit(0);
2351      case N_LBUFFER: line_buffered = TRUE; break;
2352      case N_LOFFSETS: line_offsets = number = TRUE; break;
2353      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2354      case 'a': binary_files = BIN_TEXT; break;
2355    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
2356    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
2357    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
2358      case 'I': binary_files = BIN_NOMATCH; break;
2359    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
2360    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
2361    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2362    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
2363    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2364    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
2365    case 'o': only_matching = TRUE; break;  
2366      case 'o':
2367      only_matching_last = add_number(0, only_matching_last);
2368      if (only_matching == NULL) only_matching = only_matching_last;
2369      break;
2370    
2371    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
2372    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
2373    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
2374    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2375    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
2376    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
2377    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
2378    
2379    case 'V':    case 'V':
2380    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stdout, "pcregrep version %s\n", pcre_version());
2381    fprintf(stderr, "PCRE version %s\n", pcre_version());    pcregrep_exit(0);
   exit(0);  
2382    break;    break;
2383    
2384    default:    default:
2385    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2386    exit(usage(2));    pcregrep_exit(usage(2));
2387    }    }
2388    
2389  return options;  return options;
# Line 1088  return buffer; Line 2421  return buffer;
2421  *          Compile a single pattern              *  *          Compile a single pattern              *
2422  *************************************************/  *************************************************/
2423    
2424  /* When the -F option has been used, this is called for each substring.  /* Do nothing if the pattern has already been compiled. This is the case for
2425  Otherwise it's called for each supplied pattern.  include/exclude patterns read from a file.
2426    
2427    When the -F option has been used, each "pattern" may be a list of strings,
2428    separated by line breaks. They will be matched literally. We split such a
2429    string and compile the first substring, inserting an additional block into the
2430    pattern chain.
2431    
2432  Arguments:  Arguments:
2433    pattern        the pattern string    p              points to the pattern block
2434    options        the PCRE options    options        the PCRE options
2435    filename       the file name, or NULL for a command-line pattern    popts          the processing options
2436      fromfile       TRUE if the pattern was read from a file
2437      fromtext       file name or identifying text (e.g. "include")
2438    count          0 if this is the only command line pattern, or    count          0 if this is the only command line pattern, or
2439                   number of the command line pattern, or                   number of the command line pattern, or
2440                   linenumber for a pattern from a file                   linenumber for a pattern from a file
# Line 1103  Returns:         TRUE on success, FALSE Line 2443  Returns:         TRUE on success, FALSE
2443  */  */
2444    
2445  static BOOL  static BOOL
2446  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_pattern(patstr *p, int options, int popts, int fromfile,
2447      const char *fromtext, int count)
2448  {  {
2449  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
2450  const char *error;  const char *error;
2451    char *ps = p->string;
2452    int patlen = strlen(ps);
2453  int errptr;  int errptr;
2454    
2455  if (pattern_count >= MAX_PATTERN_COUNT)  if (p->compiled != NULL) return TRUE;
2456    
2457    if ((popts & PO_FIXED_STRINGS) != 0)
2458    {    {
2459    fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",    int ellength;
2460      (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);    char *eop = ps + patlen;
2461    return FALSE;    char *pe = end_of_line(ps, eop, &ellength);
2462    
2463      if (ellength != 0)
2464        {
2465        if (add_pattern(pe, p) == NULL) return FALSE;
2466        patlen = (int)(pe - ps - ellength);
2467        }
2468    }    }
2469    
2470  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2471    suffix[process_options]);  p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2472  pattern_list[pattern_count] =  if (p->compiled != NULL) return TRUE;
   pcre_compile(buffer, options, &error, &errptr, pcretables);  
 if (pattern_list[pattern_count++] != NULL) return TRUE;  
2473    
2474  /* Handle compile errors */  /* Handle compile errors */
2475    
2476  errptr -= (int)strlen(prefix[process_options]);  errptr -= (int)strlen(prefix[popts]);
2477  if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);  if (errptr > patlen) errptr = patlen;
2478    
2479  if (filename == NULL)  if (fromfile)
2480    {    {
2481    if (count == 0)    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2482      fprintf(stderr, "pcregrep: Error in command-line regex "      "at offset %d: %s\n", count, fromtext, errptr, error);
       "at offset %d: %s\n", errptr, error);  
   else  
     fprintf(stderr, "pcregrep: Error in %s command-line regex "  
       "at offset %d: %s\n", ordin(count), errptr, error);  
2483    }    }
2484  else  else
2485    {    {
2486    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "    if (count == 0)
2487      "at offset %d: %s\n", count, filename, errptr, error);      fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2488          fromtext, errptr, error);
2489      else
2490        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2491          ordin(count), fromtext, errptr, error);
2492    }    }
2493    
2494  return FALSE;  return FALSE;
# Line 1148  return FALSE; Line 2497  return FALSE;
2497    
2498    
2499  /*************************************************  /*************************************************
2500  *           Compile one supplied pattern         *  *     Read and compile a file of patterns        *
2501  *************************************************/  *************************************************/
2502    
2503  /* When the -F option has been used, each string may be a list of strings,  /* This is used for --filelist, --include-from, and --exclude-from.
 separated by newlines. They will be matched literally.  
2504    
2505  Arguments:  Arguments:
2506    pattern        the pattern string    name         the name of the file; "-" is stdin
2507    options        the PCRE options    patptr       pointer to the pattern chain anchor
2508    filename       the file name, or NULL for a command-line pattern    patlastptr   pointer to the last pattern pointer
2509    count          0 if this is the only command line pattern, or    popts        the process options to pass to pattern_compile()
                  number of the command line pattern, or  
                  linenumber for a pattern from a file  
2510    
2511  Returns:         TRUE on success, FALSE after an error  Returns:       TRUE if all went well
2512  */  */
2513    
2514  static BOOL  static BOOL
2515  compile_pattern(char *pattern, int options, char *filename, int count)  read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2516  {  {
2517  if ((process_options & PO_FIXED_STRINGS) != 0)  int linenumber = 0;
2518    FILE *f;
2519    char *filename;
2520    char buffer[PATBUFSIZE];
2521    
2522    if (strcmp(name, "-") == 0)
2523      {
2524      f = stdin;
2525      filename = stdin_name;
2526      }
2527    else
2528      {
2529      f = fopen(name, "r");
2530      if (f == NULL)
2531        {
2532        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2533        return FALSE;
2534        }
2535      filename = name;
2536      }
2537    
2538    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2539    {    {
2540    char buffer[MBUFTHIRD];    char *s = buffer + (int)strlen(buffer);
2541      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2542      *s = 0;
2543      linenumber++;
2544      if (buffer[0] == 0) continue;   /* Skip blank lines */
2545    
2546      /* Note: this call to add_pattern() puts a pointer to the local variable
2547      "buffer" into the pattern chain. However, that pointer is used only when
2548      compiling the pattern, which happens immediately below, so we flatten it
2549      afterwards, as a precaution against any later code trying to use it. */
2550    
2551      *patlastptr = add_pattern(buffer, *patlastptr);
2552      if (*patlastptr == NULL) return FALSE;
2553      if (*patptr == NULL) *patptr = *patlastptr;
2554    
2555      /* This loop is needed because compiling a "pattern" when -F is set may add
2556      on additional literal patterns if the original contains a newline. In the
2557      common case, it never will, because fgets() stops at a newline. However,
2558      the -N option can be used to give pcregrep a different newline setting. */
2559    
2560    for(;;)    for(;;)
2561      {      {
2562      char *p = strchr(pattern, '\n');      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2563      if (p == NULL)          linenumber))
       return compile_single_pattern(pattern, options, filename, count);  
     sprintf(buffer, "%.*s", p - pattern, pattern);  
     pattern = p + 1;  
     if (!compile_single_pattern(buffer, options, filename, count))  
2564        return FALSE;        return FALSE;
2565        (*patlastptr)->string = NULL;            /* Insurance */
2566        if ((*patlastptr)->next == NULL) break;
2567        *patlastptr = (*patlastptr)->next;
2568      }      }
2569    }    }
2570  else return compile_single_pattern(pattern, options, filename, count);  
2571    if (f != stdin) fclose(f);
2572    return TRUE;
2573  }  }
2574    
2575    
# Line 1198  main(int argc, char **argv) Line 2585  main(int argc, char **argv)
2585  {  {
2586  int i, j;  int i, j;
2587  int rc = 1;  int rc = 1;
 int pcre_options = 0;  
 int cmd_pattern_count = 0;  
 int errptr;  
2588  BOOL only_one_at_top;  BOOL only_one_at_top;
2589  char *patterns[MAX_PATTERN_COUNT];  patstr *cp;
2590    fnstr *fn;
2591  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2592  const char *error;  const char *error;
2593    
2594    #ifdef SUPPORT_PCREGREP_JIT
2595    pcre_jit_stack *jit_stack = NULL;
2596    #endif
2597    
2598    /* Set the default line ending value from the default in the PCRE library;
2599    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2600    Note that the return values from pcre_config(), though derived from the ASCII
2601    codes, are the same in EBCDIC environments, so we must use the actual values
2602    rather than escapes such as as '\r'. */
2603    
2604    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2605    switch(i)
2606      {
2607      default:               newline = (char *)"lf"; break;
2608      case 13:               newline = (char *)"cr"; break;
2609      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2610      case -1:               newline = (char *)"any"; break;
2611      case -2:               newline = (char *)"anycrlf"; break;
2612      }
2613    
2614  /* Process the options */  /* Process the options */
2615    
2616  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1222  for (i = 1; i < argc; i++) Line 2627  for (i = 1; i < argc; i++)
2627    
2628    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2629      {      {
2630      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_files != NULL || patterns != NULL) break;
2631        else exit(usage(2));        else pcregrep_exit(usage(2));
2632      }      }
2633    
2634    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1245  for (i = 1; i < argc; i++) Line 2650  for (i = 1; i < argc; i++)
2650      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2651      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2652      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2653      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2654      these categories, fortunately. */      both these categories. */
2655    
2656      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2657        {        {
2658        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2659        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2660        if (opbra == NULL)     /* Not a (p) case */  
2661          /* Handle options with only one spelling of the name */
2662    
2663          if (opbra == NULL)     /* Does not contain '(' */
2664          {          {
2665          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2666            {            {
# Line 1260  for (i = 1; i < argc; i++) Line 2668  for (i = 1; i < argc; i++)
2668            }            }
2669          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2670            {            {
2671            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2672            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2673                (int)strlen(arg) : (int)(argequals - arg);
2674            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2675              {              {
2676              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1274  for (i = 1; i < argc; i++) Line 2683  for (i = 1; i < argc; i++)
2683              }              }
2684            }            }
2685          }          }
2686        else                   /* Special case xxxx(p) */  
2687          /* Handle options with an alternate spelling of the name */
2688    
2689          else
2690          {          {
2691          char buff1[24];          char buff1[24];
2692          char buff2[24];          char buff2[24];
2693          int baselen = opbra - op->long_name;  
2694            int baselen = (int)(opbra - op->long_name);
2695            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2696            int arglen = (argequals == NULL || equals == NULL)?
2697              (int)strlen(arg) : (int)(argequals - arg);
2698    
2699          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2700          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2701            opbra + 1);  
2702          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2703               strncmp(arg, buff2, arglen) == 0)
2704              {
2705              if (equals != NULL && argequals != NULL)
2706                {
2707                option_data = argequals;
2708                if (*option_data == '=')
2709                  {
2710                  option_data++;
2711                  longopwasequals = TRUE;
2712                  }
2713                }
2714            break;            break;
2715              }
2716          }          }
2717        }        }
2718    
2719      if (op->one_char == 0)      if (op->one_char == 0)
2720        {        {
2721        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2722        exit(usage(2));        pcregrep_exit(usage(2));
2723        }        }
2724      }      }
2725    
2726      /* Jeffrey Friedl's debugging harness uses these additional options which
2727      are not in the right form for putting in the option table because they use
2728      only one hyphen, yet are more than one character long. By putting them
2729      separately here, they will not get displayed as part of the help() output,
2730      but I don't think Jeffrey will care about that. */
2731    
2732    #ifdef JFRIEDL_DEBUG
2733      else if (strcmp(argv[i], "-pre") == 0) {
2734              jfriedl_prefix = argv[++i];
2735              continue;
2736      } else if (strcmp(argv[i], "-post") == 0) {
2737              jfriedl_postfix = argv[++i];
2738              continue;
2739      } else if (strcmp(argv[i], "-XT") == 0) {
2740              sscanf(argv[++i], "%d", &jfriedl_XT);
2741              continue;
2742      } else if (strcmp(argv[i], "-XR") == 0) {
2743              sscanf(argv[++i], "%d", &jfriedl_XR);
2744              continue;
2745      }
2746    #endif
2747    
2748    
2749    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
2750    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
2751    
# Line 1301  for (i = 1; i < argc; i++) Line 2753  for (i = 1; i < argc; i++)
2753      {      {
2754      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2755      longop = FALSE;      longop = FALSE;
2756    
2757      while (*s != 0)      while (*s != 0)
2758        {        {
2759        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2760          { if (*s == op->one_char) break; }          {
2761            if (*s == op->one_char) break;
2762            }
2763        if (op->one_char == 0)        if (op->one_char == 0)
2764          {          {
2765          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2766            *s, argv[i]);            *s, argv[i]);
2767          exit(usage(2));          pcregrep_exit(usage(2));
2768            }
2769    
2770          option_data = s+1;
2771    
2772          /* Break out if this is the last character in the string; it's handled
2773          below like a single multi-char option. */
2774    
2775          if (*option_data == 0) break;
2776    
2777          /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2778          are used for ones that either have a numerical number or defaults, i.e.
2779          the data is optional. If a digit follows, there is data; if not, carry on
2780          with other single-character options in the same string. */
2781    
2782          if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2783            {
2784            if (isdigit((unsigned char)s[1])) break;
2785          }          }
2786        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for an option with data */
2787          {          {
2788          option_data = s+1;          if (op->type != OP_NODATA) break;
         break;  
2789          }          }
2790    
2791          /* Handle a single-character option with no data, then loop for the
2792          next character in the string. */
2793    
2794        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2795        }        }
2796      }      }
# Line 1330  for (i = 1; i < argc; i++) Line 2805  for (i = 1; i < argc; i++)
2805      continue;      continue;
2806      }      }
2807    
2808    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2809    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2810    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2811    Jeffrey Friedl's special debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2812    
2813    if (*option_data == 0 &&    if (*option_data == 0 &&
2814        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2815           op->type == OP_OP_NUMBERS))
2816      {      {
2817      switch (op->one_char)      switch (op->one_char)
2818        {        {
2819        case N_COLOUR:        case N_COLOUR:
2820        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2821        break;        break;
2822    
2823          case 'o':
2824          only_matching_last = add_number(0, only_matching_last);
2825          if (only_matching == NULL) only_matching = only_matching_last;
2826          break;
2827    
2828  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2829        case 'S':        case 'S':
2830        S_arg = 0;        S_arg = 0;
# Line 1359  for (i = 1; i < argc; i++) Line 2841  for (i = 1; i < argc; i++)
2841      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2842        {        {
2843        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2844        exit(usage(2));        pcregrep_exit(usage(2));
2845        }        }
2846      option_data = argv[++i];      option_data = argv[++i];
2847      }      }
2848    
2849    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2850    multiple times to create a list of patterns. */    added to a chain of numbers. */
2851    
2852      if (op->type == OP_OP_NUMBERS)
2853        {
2854        unsigned long int n = decode_number(option_data, op, longop);
2855        omdatastr *omd = (omdatastr *)op->dataptr;
2856        *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2857        if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2858        }
2859    
2860      /* If the option type is OP_PATLIST, it's the -e option, or one of the
2861      include/exclude options, which can be called multiple times to create lists
2862      of patterns. */
2863    
2864      else if (op->type == OP_PATLIST)
2865        {
2866        patdatastr *pd = (patdatastr *)op->dataptr;
2867        *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2868        if (*(pd->lastptr) == NULL) goto EXIT2;
2869        if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2870        }
2871    
2872      /* If the option type is OP_FILELIST, it's one of the options that names a
2873      file. */
2874    
2875    if (op->type == OP_PATLIST)    else if (op->type == OP_FILELIST)
2876      {      {
2877      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      fndatastr *fd = (fndatastr *)op->dataptr;
2878        fn = (fnstr *)malloc(sizeof(fnstr));
2879        if (fn == NULL)
2880        {        {
2881        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",        fprintf(stderr, "pcregrep: malloc failed\n");
2882          MAX_PATTERN_COUNT);        goto EXIT2;
2883        return 2;        }
2884        fn->next = NULL;
2885        fn->name = option_data;
2886        if (*(fd->anchor) == NULL)
2887          *(fd->anchor) = fn;
2888        else
2889          (*(fd->lastptr))->next = fn;
2890        *(fd->lastptr) = fn;
2891        }
2892    
2893      /* Handle OP_BINARY_FILES */
2894    
2895      else if (op->type == OP_BINFILES)
2896        {
2897        if (strcmp(option_data, "binary") == 0)
2898          binary_files = BIN_BINARY;
2899        else if (strcmp(option_data, "without-match") == 0)
2900          binary_files = BIN_NOMATCH;
2901        else if (strcmp(option_data, "text") == 0)
2902          binary_files = BIN_TEXT;
2903        else
2904          {
2905          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2906            option_data);
2907          pcregrep_exit(usage(2));
2908        }        }
     patterns[cmd_pattern_count++] = option_data;  
2909      }      }
2910    
2911    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with a single string or numeric data value. */
2912    
2913    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2914               op->type != OP_OP_NUMBER)
2915      {      {
2916      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2917      }      }
2918    else    else
2919      {      {
2920      char *endptr;      unsigned long int n = decode_number(option_data, op, longop);
2921      int n = strtoul(option_data, &endptr, 10);      if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2922      if (*endptr != 0)        else *((int *)op->dataptr) = n;
       {  
       if (longop)  
         {  
         char *equals = strchr(op->long_name, '=');  
         int nlen = (equals == NULL)? (int)strlen(op->long_name) :  
           equals - op->long_name;  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",  
           option_data, nlen, op->long_name);  
         }  
       else  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",  
           option_data, op->one_char);  
       exit(usage(2));  
       }  
     *((int *)op->dataptr) = n;  
2923      }      }
2924    }    }
2925    
# Line 1416  if (both_context > 0) Line 2932  if (both_context > 0)
2932    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2933    }    }
2934    
2935    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2936    However, all three set show_only_matching because they display, each in their
2937    own way, only the data that has matched. */
2938    
2939    if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2940        (file_offsets && line_offsets))
2941      {
2942      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2943        "and/or --line-offsets\n");
2944      pcregrep_exit(usage(2));
2945      }
2946    
2947    if (only_matching != NULL || file_offsets || line_offsets)
2948      show_only_matching = TRUE;
2949    
2950  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2951  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2952    
# Line 1465  if (colour_option != NULL && strcmp(colo Line 2996  if (colour_option != NULL && strcmp(colo
2996      }      }
2997    }    }
2998    
2999    /* Interpret the newline type; the default settings are Unix-like. */
3000    
3001    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
3002      {
3003      pcre_options |= PCRE_NEWLINE_CR;
3004      endlinetype = EL_CR;
3005      }
3006    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
3007      {
3008      pcre_options |= PCRE_NEWLINE_LF;
3009      endlinetype = EL_LF;
3010      }
3011    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
3012      {
3013      pcre_options |= PCRE_NEWLINE_CRLF;
3014      endlinetype = EL_CRLF;
3015      }
3016    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
3017      {
3018      pcre_options |= PCRE_NEWLINE_ANY;
3019      endlinetype = EL_ANY;
3020      }
3021    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
3022      {
3023      pcre_options |= PCRE_NEWLINE_ANYCRLF;
3024      endlinetype = EL_ANYCRLF;
3025      }
3026    else
3027      {
3028      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3029      return 2;
3030      }
3031    
3032  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
3033    
3034  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1490  if (DEE_option != NULL) Line 3054  if (DEE_option != NULL)
3054      }      }
3055    }    }
3056    
3057  /* Check the value for Jeff Friedl's debugging option. */  /* Check the values for Jeffrey Friedl's debugging options. */
3058    
3059  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
3060  if (S_arg > 9)  if (S_arg > 9)
# Line 1498  if (S_arg > 9) Line 3062  if (S_arg > 9)
3062    fprintf(stderr, "pcregrep: bad value for -S option\n");    fprintf(stderr, "pcregrep: bad value for -S option\n");
3063    return 2;    return 2;
3064    }    }
3065    if (jfriedl_XT != 0 || jfriedl_XR != 0)
3066      {
3067      if (jfriedl_XT == 0) jfriedl_XT = 1;
3068      if (jfriedl_XR == 0) jfriedl_XR = 1;
3069      }
3070  #endif  #endif
3071    
3072  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer. */
3073    
3074  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  bufsize = 3*bufthird;
3075  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  main_buffer = (char *)malloc(bufsize);
3076    
3077  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL)
3078    {    {
3079    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
3080    return 2;    goto EXIT2;
3081    }    }
3082    
3083  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there are no files provided by -f,
3084  the first argument is the one and only pattern, and it must exist. */  the first argument is the one and only pattern, and it must exist. */
3085    
3086  if (cmd_pattern_count == 0 && pattern_filename == NULL)  if (patterns == NULL && pattern_files == NULL)
3087    {    {
3088    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
3089    patterns[cmd_pattern_count++] = argv[i++];    patterns = patterns_last = add_pattern(argv[i++], NULL);
3090      if (patterns == NULL) goto EXIT2;
3091    }    }
3092    
3093  /* Compile the patterns that were provided on the command line, either by  /* Compile the patterns that were provided on the command line, either by
3094  multiple uses of -e or as a single unkeyed pattern. */  multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3095    after all the command-line options are read so that we know which PCRE options
3096    to use. When -F is used, compile_pattern() may add another block into the
3097    chain, so we must not access the next pointer till after the compile. */
3098    
3099  for (j = 0; j < cmd_pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3100    {    {
3101    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3102         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 1 && patterns->next == NULL)? 0 : j))
3103      return 2;      goto EXIT2;
3104    }    }
3105    
3106  /* Compile the regular expressions that are provided in a file. */  /* Read and compile the regular expressions that are provided in files. */
3107    
3108  if (pattern_filename != NULL)  for (fn = pattern_files; fn != NULL; fn = fn->next)
3109    {    {
3110    int linenumber = 0;    if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3111    FILE *f;      goto EXIT2;
3112    char *filename;    }
   char buffer[MBUFTHIRD];  
3113    
3114    if (strcmp(pattern_filename, "-") == 0)  /* Study the regular expressions, as we will be running them many times. If an
3115      {  extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3116      f = stdin;  returned, even if studying produces no data. */
     filename = stdin_name;  
     }  
   else  
     {  
     f = fopen(pattern_filename, "r");  
     if (f == NULL)  
       {  
       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,  
         strerror(errno));  
       return 2;  
       }  
     filename = pattern_filename;  
     }  
3117    
3118    while (fgets(buffer, MBUFTHIRD, f) != NULL)  if (match_limit > 0 || match_limit_recursion > 0)
3119      {    study_options |= PCRE_STUDY_EXTRA_NEEDED;
     char *s = buffer + (int)strlen(buffer);  
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;  
     *s = 0;  
     linenumber++;  
     if (buffer[0] == 0) continue;   /* Skip blank lines */  
     if (!compile_pattern(buffer, pcre_options, filename, linenumber))  
       return 2;  
     }  
3120    
3121    if (f != stdin) fclose(f);  /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
   }  
3122    
3123  /* Study the regular expressions, as we will be running them many times */  #ifdef SUPPORT_PCREGREP_JIT
3124    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3125      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3126    #endif
3127    
3128  for (j = 0; j < pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3129    {    {
3130    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
3131    if (error != NULL)    if (error != NULL)
3132      {      {
3133      char s[16];      char s[16];
3134      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3135      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3136      return 2;      goto EXIT2;
3137      }      }
3138    #ifdef SUPPORT_PCREGREP_JIT
3139      if (jit_stack != NULL && cp->hint != NULL)
3140        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3141    #endif
3142    }    }
3143    
3144  /* If there are include or exclude patterns, compile them. */  /* If --match-limit or --recursion-limit was set, put the value(s) into the
3145    pcre_extra block for each pattern. There will always be an extra block because
3146    of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3147    
3148  if (exclude_pattern != NULL)  for (cp = patterns; cp != NULL; cp = cp->next)
3149    {    {
3150    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,    if (match_limit > 0)
     pcretables);  
   if (exclude_compiled == NULL)  
3151      {      {
3152      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3153        errptr, error);      cp->hint->match_limit = match_limit;
3154      return 2;      }
3155    
3156      if (match_limit_recursion > 0)
3157        {
3158        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3159        cp->hint->match_limit_recursion = match_limit_recursion;
3160      }      }
3161    }    }
3162    
3163  if (include_pattern != NULL)  /* If there are include or exclude patterns read from the command line, compile
3164    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3165    0. */
3166    
3167    for (j = 0; j < 4; j++)
3168    {    {
3169    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,    int k;
3170      pcretables);    for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
   if (include_compiled == NULL)  
3171      {      {
3172      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3173        errptr, error);           (k == 1 && cp->next == NULL)? 0 : k))
3174      return 2;        goto EXIT2;
3175      }      }
3176    }    }
3177    
3178  /* If there are no further arguments, do the business on stdin and exit. */  /* Read and compile include/exclude patterns from files. */
3179    
3180    for (fn = include_from; fn != NULL; fn = fn->next)
3181      {
3182      if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3183        goto EXIT2;
3184      }
3185    
3186    for (fn = exclude_from; fn != NULL; fn = fn->next)
3187      {
3188      if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3189        goto EXIT2;
3190      }
3191    
3192    /* If there are no files that contain lists of files to search, and there are
3193    no file arguments, search stdin, and then exit. */
3194    
3195    if (file_lists == NULL && i >= argc)
3196      {
3197      rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3198        (filenames > FN_DEFAULT)? stdin_name : NULL);
3199      goto EXIT;
3200      }
3201    
3202    /* If any files that contains a list of files to search have been specified,
3203    read them line by line and search the given files. */
3204    
3205  if (i >= argc)  for (fn = file_lists; fn != NULL; fn = fn->next)
3206    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
3207      char buffer[PATBUFSIZE];
3208      FILE *fl;
3209      if (strcmp(fn->name, "-") == 0) fl = stdin; else
3210        {
3211        fl = fopen(fn->name, "rb");
3212        if (fl == NULL)
3213          {
3214          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3215            strerror(errno));
3216          goto EXIT2;
3217          }
3218        }
3219      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3220        {
3221        int frc;
3222        char *end = buffer + (int)strlen(buffer);
3223        while (end > buffer && isspace(end[-1])) end--;
3224        *end = 0;
3225        if (*buffer != 0)
3226          {
3227          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3228          if (frc > 1) rc = frc;
3229            else if (frc == 0 && rc == 1) rc = 0;
3230          }
3231        }
3232      if (fl != stdin) fclose(fl);
3233      }
3234    
3235  /* Otherwise, work through the remaining arguments as files or directories.  /* After handling file-list, work through remaining arguments. Pass in the fact
3236  Pass in the fact that there is only one argument at top level - this suppresses  that there is only one argument at top level - this suppresses the file name if
3237  the file name if the argument is not a directory and filenames are not  the argument is not a directory and filenames are not otherwise forced. */
 otherwise forced. */  
3238    
3239  only_one_at_top = i == argc - 1;   /* Catch initial value of i */  only_one_at_top = i == argc - 1 && file_lists == NULL;
3240    
3241  for (; i < argc; i++)  for (; i < argc; i++)
3242    {    {
# Line 1630  for (; i < argc; i++) Line 3246  for (; i < argc; i++)
3246      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
3247    }    }
3248    
3249  return rc;  EXIT:
3250    #ifdef SUPPORT_PCREGREP_JIT
3251    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3252    #endif
3253    
3254    if (main_buffer != NULL) free(main_buffer);
3255    
3256    free_pattern_chain(patterns);
3257    free_pattern_chain(include_patterns);
3258    free_pattern_chain(include_dir_patterns);
3259    free_pattern_chain(exclude_patterns);
3260    free_pattern_chain(exclude_dir_patterns);
3261    
3262    free_file_chain(exclude_from);
3263    free_file_chain(include_from);
3264    free_file_chain(pattern_files);
3265    free_file_chain(file_lists);
3266    
3267    while (only_matching != NULL)
3268      {
3269      omstr *this = only_matching;
3270      only_matching = this->next;
3271      free(this);
3272      }
3273    
3274    pcregrep_exit(rc);
3275    
3276    EXIT2:
3277    rc = 2;
3278    goto EXIT;
3279  }  }
3280    
3281  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.87  
changed lines
  Added in v.1355

  ViewVC Help
Powered by ViewVC 1.1.5