/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 1492 by ph10, Tue Jul 8 16:16:14 2014 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7  directories.  recurse into directories, and in z/OS it can handle PDS files.
8    
9             Copyright (c) 1997-2006 University of Cambridge  Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10    additional header is required. That header is not included in the main PCRE
11    distribution because other apparatus is needed to compile pcregrep for z/OS.
12    The header can be found in the special z/OS distribution, which is available
13    from www.zaconsultants.net or from www.cbttape.org.
14    
15               Copyright (c) 1997-2014 University of Cambridge
16    
17  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
18  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 43  POSSIBILITY OF SUCH DAMAGE.
43  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
44  */  */
45    
46    #ifdef HAVE_CONFIG_H
47    #include "config.h"
48    #endif
49    
50  #include <ctype.h>  #include <ctype.h>
51  #include <locale.h>  #include <locale.h>
52  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 56  POSSIBILITY OF SUCH DAMAGE.
56    
57  #include <sys/types.h>  #include <sys/types.h>
58  #include <sys/stat.h>  #include <sys/stat.h>
59    
60    #ifdef HAVE_UNISTD_H
61  #include <unistd.h>  #include <unistd.h>
62    #endif
63    
64    #ifdef SUPPORT_LIBZ
65    #include <zlib.h>
66    #endif
67    
68    #ifdef SUPPORT_LIBBZ2
69    #include <bzlib.h>
70    #endif
71    
 #include "config.h"  
72  #include "pcre.h"  #include "pcre.h"
73    
74  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 76  POSSIBILITY OF SUCH DAMAGE.
76    
77  typedef int BOOL;  typedef int BOOL;
78    
79  #define VERSION "4.2 09-Jan-2006"  #define OFFSET_SIZE 99
 #define MAX_PATTERN_COUNT 100  
80    
81  #if BUFSIZ > 8192  #if BUFSIZ > 8192
82  #define MBUFTHIRD BUFSIZ  #define MAXPATLEN BUFSIZ
83  #else  #else
84  #define MBUFTHIRD 8192  #define MAXPATLEN 8192
85  #endif  #endif
86    
87    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
88    
89  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
90  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
91  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
92    
93  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94    
95    /* File reading styles */
96    
97    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
98    
99  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
100    
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 107  enum { DEE_READ, DEE_SKIP };
107  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
108  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
109    
110    /* Line ending types */
111    
112    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113    
114    /* Binary file options */
115    
116    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117    
118    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119    environments), a warning is issued if the value of fwrite() is ignored.
120    Unfortunately, casting to (void) does not suppress the warning. To get round
121    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122    apply to fprintf(). */
123    
124    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125    
126    
127    
128  /*************************************************  /*************************************************
# Line 94  regular code. */ Line 134  regular code. */
134    
135  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
136  static int S_arg = -1;  static int S_arg = -1;
137    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
138    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
139    static const char *jfriedl_prefix = "";
140    static const char *jfriedl_postfix = "";
141  #endif  #endif
142    
143    static int  endlinetype;
144    
145  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
146  static char *colour_option = NULL;  static char *colour_option = NULL;
147  static char *dee_option = NULL;  static char *dee_option = NULL;
148  static char *DEE_option = NULL;  static char *DEE_option = NULL;
 static char *pattern_filename = NULL;  
 static char *stdin_name = (char *)"(standard input)";  
149  static char *locale = NULL;  static char *locale = NULL;
150    static char *main_buffer = NULL;
151    static char *newline = NULL;
152    static char *om_separator = (char *)"";
153    static char *stdin_name = (char *)"(standard input)";
154    
155  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
156    
 static int  pattern_count = 0;  
 static pcre **pattern_list;  
 static pcre_extra **hints_list;  
   
 static char *include_pattern = NULL;  
 static char *exclude_pattern = NULL;  
   
 static pcre *include_compiled = NULL;  
 static pcre *exclude_compiled = NULL;  
   
157  static int after_context = 0;  static int after_context = 0;
158  static int before_context = 0;  static int before_context = 0;
159    static int binary_files = BIN_BINARY;
160  static int both_context = 0;  static int both_context = 0;
161    static int bufthird = PCREGREP_BUFSIZE;
162    static int bufsize = 3*PCREGREP_BUFSIZE;
163    
164    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165    static int dee_action = dee_SKIP;
166    #else
167  static int dee_action = dee_READ;  static int dee_action = dee_READ;
168    #endif
169    
170  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
171  static int error_count = 0;  static int error_count = 0;
172  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
173    static int pcre_options = 0;
174  static int process_options = 0;  static int process_options = 0;
175    
176    #ifdef SUPPORT_PCREGREP_JIT
177    static int study_options = PCRE_STUDY_JIT_COMPILE;
178    #else
179    static int study_options = 0;
180    #endif
181    
182    static unsigned long int match_limit = 0;
183    static unsigned long int match_limit_recursion = 0;
184    
185  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
186  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
187    static BOOL file_offsets = FALSE;
188  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
189  static BOOL invert = FALSE;  static BOOL invert = FALSE;
190    static BOOL line_buffered = FALSE;
191    static BOOL line_offsets = FALSE;
192  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
193  static BOOL number = FALSE;  static BOOL number = FALSE;
194  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
195    static BOOL resource_error = FALSE;
196  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
197    static BOOL show_only_matching = FALSE;
198  static BOOL silent = FALSE;  static BOOL silent = FALSE;
199    static BOOL utf8 = FALSE;
200    
201    /* Structure for list of --only-matching capturing numbers. */
202    
203    typedef struct omstr {
204      struct omstr *next;
205      int groupnum;
206    } omstr;
207    
208    static omstr *only_matching = NULL;
209    static omstr *only_matching_last = NULL;
210    
211    /* Structure for holding the two variables that describe a number chain. */
212    
213    typedef struct omdatastr {
214      omstr **anchor;
215      omstr **lastptr;
216    } omdatastr;
217    
218    static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219    
220    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
221    
222    typedef struct fnstr {
223      struct fnstr *next;
224      char *name;
225    } fnstr;
226    
227    static fnstr *exclude_from = NULL;
228    static fnstr *exclude_from_last = NULL;
229    static fnstr *include_from = NULL;
230    static fnstr *include_from_last = NULL;
231    
232    static fnstr *file_lists = NULL;
233    static fnstr *file_lists_last = NULL;
234    static fnstr *pattern_files = NULL;
235    static fnstr *pattern_files_last = NULL;
236    
237    /* Structure for holding the two variables that describe a file name chain. */
238    
239    typedef struct fndatastr {
240      fnstr **anchor;
241      fnstr **lastptr;
242    } fndatastr;
243    
244    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245    static fndatastr include_from_data = { &include_from, &include_from_last };
246    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248    
249    /* Structure for pattern and its compiled form; used for matching patterns and
250    also for include/exclude patterns. */
251    
252    typedef struct patstr {
253      struct patstr *next;
254      char *string;
255      pcre *compiled;
256      pcre_extra *hint;
257    } patstr;
258    
259    static patstr *patterns = NULL;
260    static patstr *patterns_last = NULL;
261    static patstr *include_patterns = NULL;
262    static patstr *include_patterns_last = NULL;
263    static patstr *exclude_patterns = NULL;
264    static patstr *exclude_patterns_last = NULL;
265    static patstr *include_dir_patterns = NULL;
266    static patstr *include_dir_patterns_last = NULL;
267    static patstr *exclude_dir_patterns = NULL;
268    static patstr *exclude_dir_patterns_last = NULL;
269    
270    /* Structure holding the two variables that describe a pattern chain. A pointer
271    to such structures is used for each appropriate option. */
272    
273    typedef struct patdatastr {
274      patstr **anchor;
275      patstr **lastptr;
276    } patdatastr;
277    
278    static patdatastr match_patdata = { &patterns, &patterns_last };
279    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283    
284    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285                                     &include_dir_patterns, &exclude_dir_patterns };
286    
287    static const char *incexname[4] = { "--include", "--exclude",
288                                        "--include-dir", "--exclude-dir" };
289    
290  /* Structure for options and list of them */  /* Structure for options and list of them */
291    
292  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293         OP_PATLIST };         OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294    
295  typedef struct option_item {  typedef struct option_item {
296    int type;    int type;
# Line 151  typedef struct option_item { Line 303  typedef struct option_item {
303  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
304  used to identify them. */  used to identify them. */
305    
306  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
307  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
308  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
309  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
310  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
311  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
312  #define N_NULL      (-7)  #define N_LABEL        (-7)
313    #define N_LOCALE       (-8)
314    #define N_NULL         (-9)
315    #define N_LOFFSETS     (-10)
316    #define N_FOFFSETS     (-11)
317    #define N_LBUFFER      (-12)
318    #define N_M_LIMIT      (-13)
319    #define N_M_LIMIT_REC  (-14)
320    #define N_BUFSIZE      (-15)
321    #define N_NOJIT        (-16)
322    #define N_FILE_LIST    (-17)
323    #define N_BINARY_FILES (-18)
324    #define N_EXCLUDE_FROM (-19)
325    #define N_INCLUDE_FROM (-20)
326    #define N_OM_SEPARATOR (-21)
327    
328  static option_item optionlist[] = {  static option_item optionlist[] = {
329    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
330    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
331    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
332    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
333    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
334    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
335    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
336    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
337    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
338    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
339    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
340    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
341    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
342    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
343    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
344    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
345    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
347    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
348    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
349    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
350    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
351    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },  #ifdef SUPPORT_PCREGREP_JIT
352    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
353    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },  #else
354    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
355    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },  #endif
356      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
357      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
358      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
359      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
360      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
361      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
362      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
363      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
365      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
367      { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368      { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
370      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
371      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
372      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
373      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377    
378      /* These two were accidentally implemented with underscores instead of
379      hyphens in the option names. As this was not discovered for several releases,
380      the incorrect versions are left in the table for compatibility. However, the
381      --help function misses out any option that has an underscore in its name. */
382    
383      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385    
386  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
387    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
388  #endif  #endif
# Line 202  static option_item optionlist[] = { Line 398  static option_item optionlist[] = {
398  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399  options. These set the 1, 2, and 4 bits in process_options, respectively. Note  options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400  that the combination of -w and -x has the same effect as -x on its own, so we  that the combination of -w and -x has the same effect as -x on its own, so we
401  can treat them as the same. */  can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402    prefix+suffix is 10 characters; if anything longer is added, it must be
403    adjusted. */
404    
405  static const char *prefix[] = {  static const char *prefix[] = {
406    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
# Line 210  static const char *prefix[] = { Line 408  static const char *prefix[] = {
408  static const char *suffix[] = {  static const char *suffix[] = {
409    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
410    
411    /* UTF-8 tables - used only when the newline setting is "any". */
412    
413    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
414    
415    const char utf8_table4[] = {
416      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
417      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
418      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
419      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
420    
421    
422    
423    /*************************************************
424    *         Exit from the program                  *
425    *************************************************/
426    
427    /* If there has been a resource error, give a suitable message.
428    
429    Argument:  the return code
430    Returns:   does not return
431    */
432    
433    static void
434    pcregrep_exit(int rc)
435    {
436    if (resource_error)
437      {
438      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440        PCRE_ERROR_JIT_STACKLIMIT);
441      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442      }
443    exit(rc);
444    }
445    
446    
447    /*************************************************
448    *          Add item to chain of patterns         *
449    *************************************************/
450    
451    /* Used to add an item onto a chain, or just return an unconnected item if the
452    "after" argument is NULL.
453    
454    Arguments:
455      s          pattern string to add
456      after      if not NULL points to item to insert after
457    
458    Returns:     new pattern block or NULL on error
459    */
460    
461    static patstr *
462    add_pattern(char *s, patstr *after)
463    {
464    patstr *p = (patstr *)malloc(sizeof(patstr));
465    if (p == NULL)
466      {
467      fprintf(stderr, "pcregrep: malloc failed\n");
468      pcregrep_exit(2);
469      }
470    if (strlen(s) > MAXPATLEN)
471      {
472      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473        MAXPATLEN);
474      free(p);
475      return NULL;
476      }
477    p->next = NULL;
478    p->string = s;
479    p->compiled = NULL;
480    p->hint = NULL;
481    
482    if (after != NULL)
483      {
484      p->next = after->next;
485      after->next = p;
486      }
487    return p;
488    }
489    
490    
491    /*************************************************
492    *           Free chain of patterns               *
493    *************************************************/
494    
495    /* Used for several chains of patterns.
496    
497    Argument: pointer to start of chain
498    Returns:  nothing
499    */
500    
501    static void
502    free_pattern_chain(patstr *pc)
503    {
504    while (pc != NULL)
505      {
506      patstr *p = pc;
507      pc = p->next;
508      if (p->hint != NULL) pcre_free_study(p->hint);
509      if (p->compiled != NULL) pcre_free(p->compiled);
510      free(p);
511      }
512    }
513    
514    
515    /*************************************************
516    *           Free chain of file names             *
517    *************************************************/
518    
519    /*
520    Argument: pointer to start of chain
521    Returns:  nothing
522    */
523    
524    static void
525    free_file_chain(fnstr *fn)
526    {
527    while (fn != NULL)
528      {
529      fnstr *f = fn;
530      fn = f->next;
531      free(f);
532      }
533    }
534    
535    
536  /*************************************************  /*************************************************
537  *            OS-specific functions               *  *            OS-specific functions               *
538  *************************************************/  *************************************************/
539    
540  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific.
541  although at present the only ones are for Unix, Win32, and for "no support". */  At present there are versions for Unix-style environments, Windows, native
542    z/OS, and "no support". */
543    
544    
545  /************* Directory scanning in Unix ***********/  /************* Directory scanning Unix-style and z/OS ***********/
546    
547  #if IS_UNIX  #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
548  #include <sys/types.h>  #include <sys/types.h>
549  #include <sys/stat.h>  #include <sys/stat.h>
550  #include <dirent.h>  #include <dirent.h>
551    
552    #if defined NATIVE_ZOS
553    /************* Directory and PDS/E scanning for z/OS ***********/
554    /************* z/OS looks mostly like Unix with USS ************/
555    /* However, z/OS needs the #include statements in this header */
556    #include "pcrzosfs.h"
557    /* That header is not included in the main PCRE distribution because
558       other apparatus is needed to compile pcregrep for z/OS. The header
559       can be found in the special z/OS distribution, which is available
560       from www.zaconsultants.net or from www.cbttape.org. */
561    #endif
562    
563  typedef DIR directory_type;  typedef DIR directory_type;
564    #define FILESEP '/'
565    
566  static int  static int
567  isdirectory(char *filename)  isdirectory(char *filename)
# Line 235  isdirectory(char *filename) Line 569  isdirectory(char *filename)
569  struct stat statbuf;  struct stat statbuf;
570  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
571    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
572  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
573  }  }
574    
575  static directory_type *  static directory_type *
# Line 254  for (;;) Line 588  for (;;)
588    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
589      return dent->d_name;      return dent->d_name;
590    }    }
591  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
592  }  }
593    
594  static void  static void
# Line 264  closedir(dir); Line 598  closedir(dir);
598  }  }
599    
600    
601  /************* Test for regular file in Unix **********/  /************* Test for regular file, Unix-style **********/
602    
603  static int  static int
604  isregfile(char *filename)  isregfile(char *filename)
# Line 276  return (statbuf.st_mode & S_IFMT) == S_I Line 610  return (statbuf.st_mode & S_IFMT) == S_I
610  }  }
611    
612    
613  /************* Test stdout for being a terminal in Unix **********/  #if defined NATIVE_ZOS
614    /************* Test for a terminal in z/OS **********/
615    /* isatty() does not work in a TSO environment, so always give FALSE.*/
616    
617    static BOOL
618    is_stdout_tty(void)
619    {
620    return FALSE;
621    }
622    
623    static BOOL
624    is_file_tty(FILE *f)
625    {
626    return FALSE;
627    }
628    
629    
630    /************* Test for a terminal, Unix-style **********/
631    
632    #else
633  static BOOL  static BOOL
634  is_stdout_tty(void)  is_stdout_tty(void)
635  {  {
636  return isatty(fileno(stdout));  return isatty(fileno(stdout));
637  }  }
638    
639    static BOOL
640    is_file_tty(FILE *f)
641    {
642    return isatty(fileno(f));
643    }
644    #endif
645    
646    /* End of Unix-style or native z/OS environment functions. */
647    
648  /************* Directory scanning in Win32 ***********/  
649    /************* Directory scanning in Windows ***********/
650    
651  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
652  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
653  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
654    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
655    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
656    undefined when it is indeed undefined. */
657    
658  #elif HAVE_WIN32API  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
659    
660  #ifndef STRICT  #ifndef STRICT
661  # define STRICT  # define STRICT
# Line 300  when it did not exist. */ Line 663  when it did not exist. */
663  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
664  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
665  #endif  #endif
666    
667    #include <windows.h>
668    
669  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
670  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
671  #endif  #endif
672    
 #include <windows.h>  
   
673  typedef struct directory_type  typedef struct directory_type
674  {  {
675  HANDLE handle;  HANDLE handle;
# Line 313  BOOL first; Line 677  BOOL first;
677  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
678  } directory_type;  } directory_type;
679    
680    #define FILESEP '/'
681    
682  int  int
683  isdirectory(char *filename)  isdirectory(char *filename)
684  {  {
685  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
686  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
687    return 0;    return 0;
688  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
689  }  }
690    
691  directory_type *  directory_type *
# Line 330  char *pattern; Line 696  char *pattern;
696  directory_type *dir;  directory_type *dir;
697  DWORD err;  DWORD err;
698  len = strlen(filename);  len = strlen(filename);
699  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
700  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
701  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
702    {    {
703    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
704    exit(2);    pcregrep_exit(2);
705    }    }
706  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
707  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 383  free(dir); Line 749  free(dir);
749  }  }
750    
751    
752  /************* Test for regular file in Win32 **********/  /************* Test for regular file in Windows **********/
753    
754  /* I don't know how to do this, or if it can be done; assume all paths are  /* I don't know how to do this, or if it can be done; assume all paths are
755  regular if they are not directories. */  regular if they are not directories. */
756    
757  int isregfile(char *filename)  int isregfile(char *filename)
758  {  {
759  return !isdirectory(filename)  return !isdirectory(filename);
760  }  }
761    
762    
763  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Windows **********/
764    
765  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
766    
767  static BOOL  static BOOL
768  is_stdout_tty(void)  is_stdout_tty(void)
769  {  {
770  FALSE;  return FALSE;
771    }
772    
773    static BOOL
774    is_file_tty(FILE *f)
775    {
776    return FALSE;
777  }  }
778    
779    /* End of Windows functions */
780    
781    
782  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
783    
# Line 411  FALSE; Line 785  FALSE;
785    
786  #else  #else
787    
788    #define FILESEP 0
789  typedef void directory_type;  typedef void directory_type;
790    
791  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
792  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
793  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
794  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
795    
796    
797  /************* Test for regular when we can't do it **********/  /************* Test for regular file when we can't do it **********/
798    
799  /* Assume all files are regular. */  /* Assume all files are regular. */
800    
801  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
802    
803    
804  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
805    
806  static BOOL  static BOOL
807  is_stdout_tty(void)  is_stdout_tty(void)
# Line 434  is_stdout_tty(void) Line 809  is_stdout_tty(void)
809  return FALSE;  return FALSE;
810  }  }
811    
812    static BOOL
813    is_file_tty(FILE *f)
814    {
815    return FALSE;
816    }
817    
818  #endif  #endif  /* End of system-specific functions */
819    
820    
821    
822  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
823  /*************************************************  /*************************************************
824  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
825  *************************************************/  *************************************************/
# Line 462  return sys_errlist[n]; Line 842  return sys_errlist[n];
842    
843    
844  /*************************************************  /*************************************************
845  *       Print the previous "after" lines         *  *                Usage function                  *
846  *************************************************/  *************************************************/
847    
848  /* This is called if we are about to lose said lines because of buffer filling,  static int
849  and at the end of the file. The data in the line is written using fwrite() so  usage(int rc)
850  that a binary zero does not terminate it.  {
851    option_item *op;
852    fprintf(stderr, "Usage: pcregrep [-");
853    for (op = optionlist; op->one_char != 0; op++)
854      {
855      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
856      }
857    fprintf(stderr, "] [long options] [pattern] [files]\n");
858    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
859      "options.\n");
860    return rc;
861    }
862    
 Arguments:  
   lastmatchnumber   the number of the last matching line, plus one  
   lastmatchrestart  where we restarted after the last match  
   endptr            end of available data  
   printname         filename for printing  
863    
 Returns:            nothing  
 */  
864    
865  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,  /*************************************************
866    char *endptr, char *printname)  *                Help function                   *
867    *************************************************/
868    
869    static void
870    help(void)
871  {  {
872  if (after_context > 0 && lastmatchnumber > 0)  option_item *op;
873    
874    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
875    printf("Search for PATTERN in each FILE or standard input.\n");
876    printf("PATTERN must be present if neither -e nor -f is used.\n");
877    printf("\"-\" can be used as a file name to mean STDIN.\n");
878    
879    #ifdef SUPPORT_LIBZ
880    printf("Files whose names end in .gz are read using zlib.\n");
881    #endif
882    
883    #ifdef SUPPORT_LIBBZ2
884    printf("Files whose names end in .bz2 are read using bzlib2.\n");
885    #endif
886    
887    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
888    printf("Other files and the standard input are read as plain files.\n\n");
889    #else
890    printf("All files are read as plain files, without any interpretation.\n\n");
891    #endif
892    
893    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
894    printf("Options:\n");
895    
896    for (op = optionlist; op->one_char != 0; op++)
897    {    {
898    int count = 0;    int n;
899    while (lastmatchrestart < endptr && count++ < after_context)    char s[4];
900    
901      /* Two options were accidentally implemented and documented with underscores
902      instead of hyphens in their names, something that was not noticed for quite a
903      few releases. When fixing this, I left the underscored versions in the list
904      in case people were using them. However, we don't want to display them in the
905      help data. There are no other options that contain underscores, and we do not
906      expect ever to implement such options. Therefore, just omit any option that
907      contains an underscore. */
908    
909      if (strchr(op->long_name, '_') != NULL) continue;
910    
911      if (op->one_char > 0 && (op->long_name)[0] == 0)
912        n = 31 - printf("  -%c", op->one_char);
913      else
914      {      {
915      char *pp = lastmatchrestart;      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
916      if (printname != NULL) fprintf(stdout, "%s-", printname);        else strcpy(s, "   ");
917      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      n = 31 - printf("  %s --%s", s, op->long_name);
     while (*pp != '\n') pp++;  
     fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);  
     lastmatchrestart = pp + 1;  
918      }      }
919    hyphenpending = TRUE;  
920      if (n < 1) n = 1;
921      printf("%.*s%s\n", n, "                           ", op->help_text);
922    }    }
923    
924    printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
925    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
926    printf("When reading patterns or file names from a file, trailing white\n");
927    printf("space is removed and blank lines are ignored.\n");
928    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
929    
930    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
931    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
932  }  }
933    
934    
935    
936  /*************************************************  /*************************************************
937  *            Grep an individual file             *  *            Test exclude/includes               *
938  *************************************************/  *************************************************/
939    
940  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* If any exclude pattern matches, the path is excluded. Otherwise, unless
941  times the value of MBUFTHIRD. The matching point is never allowed to stray into  there are no includes, the path must match an include pattern.
 the top third of the buffer, thus keeping more of the file available for  
 context printing or for multiline scanning. For large files, the pointer will  
 be in the middle third most of the time, so the bottom third is available for  
 "before" context printing.  
942    
943  Arguments:  Arguments:
944    in           the fopened FILE stream    path      the path to be matched
945    printname    the file name if it is to be printed for each match    ip        the chain of include patterns
946                 or NULL if the file name is not to be printed    ep        the chain of exclude patterns
                it cannot be NULL if filenames[_nomatch]_only is set  
947    
948  Returns:       0 if there was at least one match  Returns:    TRUE if the path is not excluded
                1 otherwise (no matches)  
949  */  */
950    
951  static int  static BOOL
952  pcregrep(FILE *in, char *printname)  test_incexc(char *path, patstr *ip, patstr *ep)
953  {  {
954  int rc = 1;  int plen = strlen(path);
 int linenumber = 1;  
 int lastmatchnumber = 0;  
 int count = 0;  
 int offsets[99];  
 char *lastmatchrestart = NULL;  
 char buffer[3*MBUFTHIRD];  
 char *ptr = buffer;  
 char *endptr;  
 size_t bufflength;  
 BOOL endhyphenpending = FALSE;  
   
 /* Do the first read into the start of the buffer and set up the pointer to  
 end of what we have. */  
955    
956  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  for (; ep != NULL; ep = ep->next)
957  endptr = buffer + bufflength;    {
958      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
959        return FALSE;
960      }
961    
962  /* Loop while the current pointer is not at the end of the file. For large  if (ip == NULL) return TRUE;
 files, endptr will be at the end of the buffer when we are in the middle of the  
 file, but ptr will never get there, because as soon as it gets over 2/3 of the  
 way, the buffer is shifted left and re-filled. */  
963    
964  while (ptr < endptr)  for (; ip != NULL; ip = ip->next)
965    {    {
966    int i;    if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
967    int mrc = 0;      return TRUE;
968    BOOL match = FALSE;    }
   char *t = ptr;  
   size_t length, linelength;  
969    
970    /* At this point, ptr is at the start of a line. We need to find the length  return FALSE;
971    of the subject string to pass to pcre_exec(). In multiline mode, it is the  }
   length remainder of the data in the buffer. Otherwise, it is the length of  
   the next line. After matching, we always advance by the length of the next  
   line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so  
   that any match is constrained to be in the first line. */  
   
   linelength = 0;  
   while (t < endptr && *t++ != '\n') linelength++;  
   length = multiline? endptr - ptr : linelength;  
   
   /* Run through all the patterns until one matches. Note that we don't include  
   the final newline in the subject string. */  
   
   for (i = 0; i < pattern_count; i++)  
     {  
     mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
972    
973    /* If it's a match or a not-match (as required), do what's wanted. */  
974    
975    /*************************************************
976    *         Decode integer argument value          *
977    *************************************************/
978    
979    /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
980    because SunOS4 doesn't have it. This is used only for unpicking arguments, so
981    just keep it simple.
982    
983    Arguments:
984      option_data   the option data string
985      op            the option item (for error messages)
986      longop        TRUE if option given in long form
987    
988    Returns:        a long integer
989    */
990    
991    static long int
992    decode_number(char *option_data, option_item *op, BOOL longop)
993    {
994    unsigned long int n = 0;
995    char *endptr = option_data;
996    while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
997    while (isdigit((unsigned char)(*endptr)))
998      n = n * 10 + (int)(*endptr++ - '0');
999    if (toupper(*endptr) == 'K')
1000      {
1001      n *= 1024;
1002      endptr++;
1003      }
1004    else if (toupper(*endptr) == 'M')
1005      {
1006      n *= 1024*1024;
1007      endptr++;
1008      }
1009    
1010    if (*endptr != 0)   /* Error */
1011      {
1012      if (longop)
1013        {
1014        char *equals = strchr(op->long_name, '=');
1015        int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1016          (int)(equals - op->long_name);
1017        fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1018          option_data, nlen, op->long_name);
1019        }
1020      else
1021        fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1022          option_data, op->one_char);
1023      pcregrep_exit(usage(2));
1024      }
1025    
1026    return n;
1027    }
1028    
1029    
1030    
1031    /*************************************************
1032    *       Add item to a chain of numbers           *
1033    *************************************************/
1034    
1035    /* Used to add an item onto a chain, or just return an unconnected item if the
1036    "after" argument is NULL.
1037    
1038    Arguments:
1039      n          the number to add
1040      after      if not NULL points to item to insert after
1041    
1042    Returns:     new number block
1043    */
1044    
1045    static omstr *
1046    add_number(int n, omstr *after)
1047    {
1048    omstr *om = (omstr *)malloc(sizeof(omstr));
1049    
1050    if (om == NULL)
1051      {
1052      fprintf(stderr, "pcregrep: malloc failed\n");
1053      pcregrep_exit(2);
1054      }
1055    om->next = NULL;
1056    om->groupnum = n;
1057    
1058    if (after != NULL)
1059      {
1060      om->next = after->next;
1061      after->next = om;
1062      }
1063    return om;
1064    }
1065    
1066    
1067    
1068    /*************************************************
1069    *            Read one line of input              *
1070    *************************************************/
1071    
1072    /* Normally, input is read using fread() into a large buffer, so many lines may
1073    be read at once. However, doing this for tty input means that no output appears
1074    until a lot of input has been typed. Instead, tty input is handled line by
1075    line. We cannot use fgets() for this, because it does not stop at a binary
1076    zero, and therefore there is no way of telling how many characters it has read,
1077    because there may be binary zeros embedded in the data.
1078    
1079    Arguments:
1080      buffer     the buffer to read into
1081      length     the maximum number of characters to read
1082      f          the file
1083    
1084    Returns:     the number of characters read, zero at end of file
1085    */
1086    
1087    static unsigned int
1088    read_one_line(char *buffer, int length, FILE *f)
1089    {
1090    int c;
1091    int yield = 0;
1092    while ((c = fgetc(f)) != EOF)
1093      {
1094      buffer[yield++] = c;
1095      if (c == '\n' || yield >= length) break;
1096      }
1097    return yield;
1098    }
1099    
1100    
1101    
1102    /*************************************************
1103    *             Find end of line                   *
1104    *************************************************/
1105    
1106    /* The length of the endline sequence that is found is set via lenptr. This may
1107    be zero at the very end of the file if there is no line-ending sequence there.
1108    
1109    Arguments:
1110      p         current position in line
1111      endptr    end of available data
1112      lenptr    where to put the length of the eol sequence
1113    
1114    Returns:    pointer after the last byte of the line,
1115                including the newline byte(s)
1116    */
1117    
1118    static char *
1119    end_of_line(char *p, char *endptr, int *lenptr)
1120    {
1121    switch(endlinetype)
1122      {
1123      default:      /* Just in case */
1124      case EL_LF:
1125      while (p < endptr && *p != '\n') p++;
1126      if (p < endptr)
1127        {
1128        *lenptr = 1;
1129        return p + 1;
1130        }
1131      *lenptr = 0;
1132      return endptr;
1133    
1134      case EL_CR:
1135      while (p < endptr && *p != '\r') p++;
1136      if (p < endptr)
1137        {
1138        *lenptr = 1;
1139        return p + 1;
1140        }
1141      *lenptr = 0;
1142      return endptr;
1143    
1144      case EL_CRLF:
1145      for (;;)
1146        {
1147        while (p < endptr && *p != '\r') p++;
1148        if (++p >= endptr)
1149          {
1150          *lenptr = 0;
1151          return endptr;
1152          }
1153        if (*p == '\n')
1154          {
1155          *lenptr = 2;
1156          return p + 1;
1157          }
1158        }
1159      break;
1160    
1161      case EL_ANYCRLF:
1162      while (p < endptr)
1163        {
1164        int extra = 0;
1165        register int c = *((unsigned char *)p);
1166    
1167        if (utf8 && c >= 0xc0)
1168          {
1169          int gcii, gcss;
1170          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1171          gcss = 6*extra;
1172          c = (c & utf8_table3[extra]) << gcss;
1173          for (gcii = 1; gcii <= extra; gcii++)
1174            {
1175            gcss -= 6;
1176            c |= (p[gcii] & 0x3f) << gcss;
1177            }
1178          }
1179    
1180        p += 1 + extra;
1181    
1182        switch (c)
1183          {
1184          case '\n':
1185          *lenptr = 1;
1186          return p;
1187    
1188          case '\r':
1189          if (p < endptr && *p == '\n')
1190            {
1191            *lenptr = 2;
1192            p++;
1193            }
1194          else *lenptr = 1;
1195          return p;
1196    
1197          default:
1198          break;
1199          }
1200        }   /* End of loop for ANYCRLF case */
1201    
1202      *lenptr = 0;  /* Must have hit the end */
1203      return endptr;
1204    
1205      case EL_ANY:
1206      while (p < endptr)
1207        {
1208        int extra = 0;
1209        register int c = *((unsigned char *)p);
1210    
1211        if (utf8 && c >= 0xc0)
1212          {
1213          int gcii, gcss;
1214          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1215          gcss = 6*extra;
1216          c = (c & utf8_table3[extra]) << gcss;
1217          for (gcii = 1; gcii <= extra; gcii++)
1218            {
1219            gcss -= 6;
1220            c |= (p[gcii] & 0x3f) << gcss;
1221            }
1222          }
1223    
1224        p += 1 + extra;
1225    
1226        switch (c)
1227          {
1228          case '\n':    /* LF */
1229          case '\v':    /* VT */
1230          case '\f':    /* FF */
1231          *lenptr = 1;
1232          return p;
1233    
1234          case '\r':    /* CR */
1235          if (p < endptr && *p == '\n')
1236            {
1237            *lenptr = 2;
1238            p++;
1239            }
1240          else *lenptr = 1;
1241          return p;
1242    
1243    #ifndef EBCDIC
1244          case 0x85:    /* Unicode NEL */
1245          *lenptr = utf8? 2 : 1;
1246          return p;
1247    
1248          case 0x2028:  /* Unicode LS */
1249          case 0x2029:  /* Unicode PS */
1250          *lenptr = 3;
1251          return p;
1252    #endif  /* Not EBCDIC */
1253    
1254          default:
1255          break;
1256          }
1257        }   /* End of loop for ANY case */
1258    
1259      *lenptr = 0;  /* Must have hit the end */
1260      return endptr;
1261      }     /* End of overall switch */
1262    }
1263    
1264    
1265    
1266    /*************************************************
1267    *         Find start of previous line            *
1268    *************************************************/
1269    
1270    /* This is called when looking back for before lines to print.
1271    
1272    Arguments:
1273      p         start of the subsequent line
1274      startptr  start of available data
1275    
1276    Returns:    pointer to the start of the previous line
1277    */
1278    
1279    static char *
1280    previous_line(char *p, char *startptr)
1281    {
1282    switch(endlinetype)
1283      {
1284      default:      /* Just in case */
1285      case EL_LF:
1286      p--;
1287      while (p > startptr && p[-1] != '\n') p--;
1288      return p;
1289    
1290      case EL_CR:
1291      p--;
1292      while (p > startptr && p[-1] != '\n') p--;
1293      return p;
1294    
1295      case EL_CRLF:
1296      for (;;)
1297        {
1298        p -= 2;
1299        while (p > startptr && p[-1] != '\n') p--;
1300        if (p <= startptr + 1 || p[-2] == '\r') return p;
1301        }
1302      /* Control can never get here */
1303    
1304      case EL_ANY:
1305      case EL_ANYCRLF:
1306      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1307      if (utf8) while ((*p & 0xc0) == 0x80) p--;
1308    
1309      while (p > startptr)
1310        {
1311        register unsigned int c;
1312        char *pp = p - 1;
1313    
1314        if (utf8)
1315          {
1316          int extra = 0;
1317          while ((*pp & 0xc0) == 0x80) pp--;
1318          c = *((unsigned char *)pp);
1319          if (c >= 0xc0)
1320            {
1321            int gcii, gcss;
1322            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1323            gcss = 6*extra;
1324            c = (c & utf8_table3[extra]) << gcss;
1325            for (gcii = 1; gcii <= extra; gcii++)
1326              {
1327              gcss -= 6;
1328              c |= (pp[gcii] & 0x3f) << gcss;
1329              }
1330            }
1331          }
1332        else c = *((unsigned char *)pp);
1333    
1334        if (endlinetype == EL_ANYCRLF) switch (c)
1335          {
1336          case '\n':    /* LF */
1337          case '\r':    /* CR */
1338          return p;
1339    
1340          default:
1341          break;
1342          }
1343    
1344        else switch (c)
1345          {
1346          case '\n':    /* LF */
1347          case '\v':    /* VT */
1348          case '\f':    /* FF */
1349          case '\r':    /* CR */
1350    #ifndef EBCDIE
1351          case 0x85:    /* Unicode NEL */
1352          case 0x2028:  /* Unicode LS */
1353          case 0x2029:  /* Unicode PS */
1354    #endif  /* Not EBCDIC */
1355          return p;
1356    
1357          default:
1358          break;
1359          }
1360    
1361        p = pp;  /* Back one character */
1362        }        /* End of loop for ANY case */
1363    
1364      return startptr;  /* Hit start of data */
1365      }     /* End of overall switch */
1366    }
1367    
1368    
1369    
1370    
1371    
1372    /*************************************************
1373    *       Print the previous "after" lines         *
1374    *************************************************/
1375    
1376    /* This is called if we are about to lose said lines because of buffer filling,
1377    and at the end of the file. The data in the line is written using fwrite() so
1378    that a binary zero does not terminate it.
1379    
1380    Arguments:
1381      lastmatchnumber   the number of the last matching line, plus one
1382      lastmatchrestart  where we restarted after the last match
1383      endptr            end of available data
1384      printname         filename for printing
1385    
1386    Returns:            nothing
1387    */
1388    
1389    static void
1390    do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1391      char *printname)
1392    {
1393    if (after_context > 0 && lastmatchnumber > 0)
1394      {
1395      int count = 0;
1396      while (lastmatchrestart < endptr && count++ < after_context)
1397        {
1398        int ellength;
1399        char *pp = lastmatchrestart;
1400        if (printname != NULL) fprintf(stdout, "%s-", printname);
1401        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1402        pp = end_of_line(pp, endptr, &ellength);
1403        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1404        lastmatchrestart = pp;
1405        }
1406      hyphenpending = TRUE;
1407      }
1408    }
1409    
1410    
1411    
1412    /*************************************************
1413    *   Apply patterns to subject till one matches   *
1414    *************************************************/
1415    
1416    /* This function is called to run through all patterns, looking for a match. It
1417    is used multiple times for the same subject when colouring is enabled, in order
1418    to find all possible matches.
1419    
1420    Arguments:
1421      matchptr     the start of the subject
1422      length       the length of the subject to match
1423      options      options for pcre_exec
1424      startoffset  where to start matching
1425      offsets      the offets vector to fill in
1426      mrc          address of where to put the result of pcre_exec()
1427    
1428    Returns:      TRUE if there was a match
1429                  FALSE if there was no match
1430                  invert if there was a non-fatal error
1431    */
1432    
1433    static BOOL
1434    match_patterns(char *matchptr, size_t length, unsigned int options,
1435      int startoffset, int *offsets, int *mrc)
1436    {
1437    int i;
1438    size_t slen = length;
1439    patstr *p = patterns;
1440    const char *msg = "this text:\n\n";
1441    
1442    if (slen > 200)
1443      {
1444      slen = 200;
1445      msg = "text that starts:\n\n";
1446      }
1447    for (i = 1; p != NULL; p = p->next, i++)
1448      {
1449      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1450        startoffset, options, offsets, OFFSET_SIZE);
1451      if (*mrc >= 0) return TRUE;
1452      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1453      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1454      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1455      fprintf(stderr, "%s", msg);
1456      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1457      fprintf(stderr, "\n\n");
1458      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1459          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1460        resource_error = TRUE;
1461      if (error_count++ > 20)
1462        {
1463        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1464        pcregrep_exit(2);
1465        }
1466      return invert;    /* No more matching; don't show the line again */
1467      }
1468    
1469    return FALSE;  /* No match, no errors */
1470    }
1471    
1472    
1473    
1474    /*************************************************
1475    *            Grep an individual file             *
1476    *************************************************/
1477    
1478    /* This is called from grep_or_recurse() below. It uses a buffer that is three
1479    times the value of bufthird. The matching point is never allowed to stray into
1480    the top third of the buffer, thus keeping more of the file available for
1481    context printing or for multiline scanning. For large files, the pointer will
1482    be in the middle third most of the time, so the bottom third is available for
1483    "before" context printing.
1484    
1485    Arguments:
1486      handle       the fopened FILE stream for a normal file
1487                   the gzFile pointer when reading is via libz
1488                   the BZFILE pointer when reading is via libbz2
1489      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1490      filename     the file name or NULL (for errors)
1491      printname    the file name if it is to be printed for each match
1492                   or NULL if the file name is not to be printed
1493                   it cannot be NULL if filenames[_nomatch]_only is set
1494    
1495    Returns:       0 if there was at least one match
1496                   1 otherwise (no matches)
1497                   2 if an overlong line is encountered
1498                   3 if there is a read error on a .bz2 file
1499    */
1500    
1501    static int
1502    pcregrep(void *handle, int frtype, char *filename, char *printname)
1503    {
1504    int rc = 1;
1505    int linenumber = 1;
1506    int lastmatchnumber = 0;
1507    int count = 0;
1508    int filepos = 0;
1509    int offsets[OFFSET_SIZE];
1510    char *lastmatchrestart = NULL;
1511    char *ptr = main_buffer;
1512    char *endptr;
1513    size_t bufflength;
1514    BOOL binary = FALSE;
1515    BOOL endhyphenpending = FALSE;
1516    BOOL input_line_buffered = line_buffered;
1517    FILE *in = NULL;                    /* Ensure initialized */
1518    
1519    #ifdef SUPPORT_LIBZ
1520    gzFile ingz = NULL;
1521    #endif
1522    
1523    #ifdef SUPPORT_LIBBZ2
1524    BZFILE *inbz2 = NULL;
1525    #endif
1526    
1527    
1528    /* Do the first read into the start of the buffer and set up the pointer to end
1529    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1530    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1531    fail. */
1532    
1533    (void)frtype;
1534    
1535    #ifdef SUPPORT_LIBZ
1536    if (frtype == FR_LIBZ)
1537      {
1538      ingz = (gzFile)handle;
1539      bufflength = gzread (ingz, main_buffer, bufsize);
1540      }
1541    else
1542    #endif
1543    
1544    #ifdef SUPPORT_LIBBZ2
1545    if (frtype == FR_LIBBZ2)
1546      {
1547      inbz2 = (BZFILE *)handle;
1548      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1549      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1550      }                                    /* without the cast it is unsigned. */
1551    else
1552    #endif
1553    
1554      {
1555      in = (FILE *)handle;
1556      if (is_file_tty(in)) input_line_buffered = TRUE;
1557      bufflength = input_line_buffered?
1558        read_one_line(main_buffer, bufsize, in) :
1559        fread(main_buffer, 1, bufsize, in);
1560      }
1561    
1562    endptr = main_buffer + bufflength;
1563    
1564    /* Unless binary-files=text, see if we have a binary file. This uses the same
1565    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1566    file. */
1567    
1568    if (binary_files != BIN_TEXT)
1569      {
1570      binary =
1571        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1572      if (binary && binary_files == BIN_NOMATCH) return 1;
1573      }
1574    
1575    /* Loop while the current pointer is not at the end of the file. For large
1576    files, endptr will be at the end of the buffer when we are in the middle of the
1577    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1578    way, the buffer is shifted left and re-filled. */
1579    
1580    while (ptr < endptr)
1581      {
1582      int endlinelength;
1583      int mrc = 0;
1584      int startoffset = 0;
1585      unsigned int options = 0;
1586      BOOL match;
1587      char *matchptr = ptr;
1588      char *t = ptr;
1589      size_t length, linelength;
1590    
1591      /* At this point, ptr is at the start of a line. We need to find the length
1592      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1593      length remainder of the data in the buffer. Otherwise, it is the length of
1594      the next line, excluding the terminating newline. After matching, we always
1595      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1596      option is used for compiling, so that any match is constrained to be in the
1597      first line. */
1598    
1599      t = end_of_line(t, endptr, &endlinelength);
1600      linelength = t - ptr - endlinelength;
1601      length = multiline? (size_t)(endptr - ptr) : linelength;
1602    
1603      /* Check to see if the line we are looking at extends right to the very end
1604      of the buffer without a line terminator. This means the line is too long to
1605      handle. */
1606    
1607      if (endlinelength == 0 && t == main_buffer + bufsize)
1608        {
1609        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1610                        "pcregrep: check the --buffer-size option\n",
1611                        linenumber,
1612                        (filename == NULL)? "" : " of file ",
1613                        (filename == NULL)? "" : filename);
1614        return 2;
1615        }
1616    
1617      /* Extra processing for Jeffrey Friedl's debugging. */
1618    
1619    #ifdef JFRIEDL_DEBUG
1620      if (jfriedl_XT || jfriedl_XR)
1621      {
1622    #     include <sys/time.h>
1623    #     include <time.h>
1624          struct timeval start_time, end_time;
1625          struct timezone dummy;
1626          int i;
1627    
1628          if (jfriedl_XT)
1629          {
1630              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1631              const char *orig = ptr;
1632              ptr = malloc(newlen + 1);
1633              if (!ptr) {
1634                      printf("out of memory");
1635                      pcregrep_exit(2);
1636              }
1637              endptr = ptr;
1638              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1639              for (i = 0; i < jfriedl_XT; i++) {
1640                      strncpy(endptr, orig,  length);
1641                      endptr += length;
1642              }
1643              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1644              length = newlen;
1645          }
1646    
1647          if (gettimeofday(&start_time, &dummy) != 0)
1648                  perror("bad gettimeofday");
1649    
1650    
1651          for (i = 0; i < jfriedl_XR; i++)
1652              match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1653                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1654    
1655          if (gettimeofday(&end_time, &dummy) != 0)
1656                  perror("bad gettimeofday");
1657    
1658          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1659                          -
1660                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1661    
1662          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1663          return 0;
1664      }
1665    #endif
1666    
1667      /* We come back here after a match when show_only_matching is set, in order
1668      to find any further matches in the same line. This applies to
1669      --only-matching, --file-offsets, and --line-offsets. */
1670    
1671      ONLY_MATCHING_RESTART:
1672    
1673      /* Run through all the patterns until one matches or there is an error other
1674      than NOMATCH. This code is in a subroutine so that it can be re-used for
1675      finding subsequent matches when colouring matched lines. After finding one
1676      match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1677      this line. */
1678    
1679      match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1680      options = PCRE_NOTEMPTY;
1681    
1682      /* If it's a match or a not-match (as required), do what's wanted. */
1683    
1684    if (match != invert)    if (match != invert)
1685      {      {
# Line 611  while (ptr < endptr) Line 1693  while (ptr < endptr)
1693    
1694      if (count_only) count++;      if (count_only) count++;
1695    
1696        /* When handling a binary file and binary-files==binary, the "binary"
1697        variable will be set true (it's false in all other cases). In this
1698        situation we just want to output the file name. No need to scan further. */
1699    
1700        else if (binary)
1701          {
1702          fprintf(stdout, "Binary file %s matches\n", filename);
1703          return 0;
1704          }
1705    
1706      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1707      in the file. */      in the file. */
1708    
1709      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1710        {        {
1711        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1712        return 0;        return 0;
# Line 624  while (ptr < endptr) Line 1716  while (ptr < endptr)
1716    
1717      else if (quiet) return 0;      else if (quiet) return 0;
1718    
1719      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched,
1720      does not pring any context. */      and/or one or more captured portions of it, as long as these strings are
1721        not empty. The --file-offsets and --line-offsets options output offsets for
1722        the matching substring (all three set show_only_matching). None of these
1723        mutually exclusive options prints any context. Afterwards, adjust the start
1724        and then jump back to look for further matches in the same line. If we are
1725        in invert mode, however, nothing is printed and we do not restart - this
1726        could still be useful because the return code is set. */
1727    
1728      else if (only_matching)      else if (show_only_matching)
1729        {        {
1730        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1731        if (number) fprintf(stdout, "%d:", linenumber);          {
1732        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1733        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1734    
1735            /* Handle --line-offsets */
1736    
1737            if (line_offsets)
1738              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1739                offsets[1] - offsets[0]);
1740    
1741            /* Handle --file-offsets */
1742    
1743            else if (file_offsets)
1744              fprintf(stdout, "%d,%d\n",
1745                (int)(filepos + matchptr + offsets[0] - ptr),
1746                offsets[1] - offsets[0]);
1747    
1748            /* Handle --only-matching, which may occur many times */
1749    
1750            else
1751              {
1752              BOOL printed = FALSE;
1753              omstr *om;
1754    
1755              for (om = only_matching; om != NULL; om = om->next)
1756                {
1757                int n = om->groupnum;
1758                if (n < mrc)
1759                  {
1760                  int plen = offsets[2*n + 1] - offsets[2*n];
1761                  if (plen > 0)
1762                    {
1763                    if (printed) fprintf(stdout, "%s", om_separator);
1764                    if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1765                    FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1766                    if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1767                    printed = TRUE;
1768                    }
1769                  }
1770                }
1771    
1772              if (printed || printname != NULL || number) fprintf(stdout, "\n");
1773              }
1774    
1775            /* Prepare to repeat to find the next match */
1776    
1777            match = FALSE;
1778            if (line_buffered) fflush(stdout);
1779            rc = 0;                      /* Had some success */
1780            startoffset = offsets[1];    /* Restart after the match */
1781            goto ONLY_MATCHING_RESTART;
1782            }
1783        }        }
1784    
1785      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 646  while (ptr < endptr) Line 1793  while (ptr < endptr)
1793    
1794        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1795          {          {
1796            int ellength;
1797          int linecount = 0;          int linecount = 0;
1798          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1799    
1800          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1801            {            {
1802            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1803            linecount++;            linecount++;
1804            }            }
1805    
# Line 665  while (ptr < endptr) Line 1812  while (ptr < endptr)
1812            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1813            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1814            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1815            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1816            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1817            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1818            }            }
1819          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1820          }          }
# Line 689  while (ptr < endptr) Line 1836  while (ptr < endptr)
1836          int linecount = 0;          int linecount = 0;
1837          char *p = ptr;          char *p = ptr;
1838    
1839          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1840                 linecount < before_context)                 linecount < before_context)
1841            {            {
1842            linecount++;            linecount++;
1843            p--;            p = previous_line(p, main_buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1844            }            }
1845    
1846          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 702  while (ptr < endptr) Line 1848  while (ptr < endptr)
1848    
1849          while (p < ptr)          while (p < ptr)
1850            {            {
1851              int ellength;
1852            char *pp = p;            char *pp = p;
1853            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1854            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1855            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1856            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            FWRITE(p, 1, pp - p, stdout);
1857            p = pp + 1;            p = pp;
1858            }            }
1859          }          }
1860    
# Line 722  while (ptr < endptr) Line 1869  while (ptr < endptr)
1869    
1870        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1871        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1872        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1873        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1874          the match will always be before the first newline sequence. */
1875    
1876        if (multiline)        if (multiline & !invert)
1877          {          {
1878          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1879          t = ptr;          t = ptr;
1880          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t <= endmatch)
1881          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1882          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &endlinelength);
1883              if (t < endmatch) linenumber++; else break;
1884              }
1885            linelength = t - ptr - endlinelength;
1886          }          }
1887    
1888        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 746  while (ptr < endptr) Line 1897  while (ptr < endptr)
1897          {          {
1898          int first = S_arg * 2;          int first = S_arg * 2;
1899          int last  = first + 1;          int last  = first + 1;
1900          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1901          fprintf(stdout, "X");          fprintf(stdout, "X");
1902          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1903          }          }
1904        else        else
1905  #endif  #endif
1906    
1907        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1908          matches, but not of course if the line is a non-match. */
1909    
1910        if (do_colour)        if (do_colour && !invert)
1911          {          {
1912          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1913            FWRITE(ptr, 1, offsets[0], stdout);
1914          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1915          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1916          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1917          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1918              {
1919              startoffset = offsets[1];
1920              if (startoffset >= (int)linelength + endlinelength ||
1921                  !match_patterns(matchptr, length, options, startoffset, offsets,
1922                    &mrc))
1923                break;
1924              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1925              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1926              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1927              fprintf(stdout, "%c[00m", 0x1b);
1928              }
1929    
1930            /* In multiline mode, we may have already printed the complete line
1931            and its line-ending characters (if they matched the pattern), so there
1932            may be no more to print. */
1933    
1934            plength = (int)((linelength + endlinelength) - startoffset);
1935            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1936          }          }
       else fwrite(ptr, 1, linelength, stdout);  
1937    
1938        fprintf(stdout, "\n");        /* Not colouring; no need to search for further matches */
1939    
1940          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1941        }        }
1942    
1943      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1944        given, flush the output. */
1945    
1946        if (line_buffered) fflush(stdout);
1947      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1948    
1949      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1950      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1951    
1952      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1953      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1954      }      }
1955    
1956    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1957      anything to be printed), we have to move on to the end of the match before
1958      proceeding. */
1959    
1960      if (multiline && invert && match)
1961        {
1962        int ellength;
1963        char *endmatch = ptr + offsets[1];
1964        t = ptr;
1965        while (t < endmatch)
1966          {
1967          t = end_of_line(t, endptr, &ellength);
1968          if (t <= endmatch) linenumber++; else break;
1969          }
1970        endmatch = end_of_line(endmatch, endptr, &ellength);
1971        linelength = endmatch - ptr - ellength;
1972        }
1973    
1974      /* Advance to after the newline and increment the line number. The file
1975      offset to the current line is maintained in filepos. */
1976    
1977    ptr += linelength + 1;    ptr += linelength + endlinelength;
1978      filepos += (int)(linelength + endlinelength);
1979    linenumber++;    linenumber++;
1980    
1981      /* If input is line buffered, and the buffer is not yet full, read another
1982      line and add it into the buffer. */
1983    
1984      if (input_line_buffered && bufflength < (size_t)bufsize)
1985        {
1986        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1987        bufflength += add;
1988        endptr += add;
1989        }
1990    
1991    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1992    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1993    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1994    about to be lost, print them. */    about to be lost, print them. */
1995    
1996    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1997      {      {
1998      if (after_context > 0 &&      if (after_context > 0 &&
1999          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
2000          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
2001        {        {
2002        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2003        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 801  while (ptr < endptr) Line 2005  while (ptr < endptr)
2005    
2006      /* Now do the shuffle */      /* Now do the shuffle */
2007    
2008      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2009      ptr -= MBUFTHIRD;      ptr -= bufthird;
2010      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
2011      endptr = buffer + bufflength;  #ifdef SUPPORT_LIBZ
2012        if (frtype == FR_LIBZ)
2013          bufflength = 2*bufthird +
2014            gzread (ingz, main_buffer + 2*bufthird, bufthird);
2015        else
2016    #endif
2017    
2018    #ifdef SUPPORT_LIBBZ2
2019        if (frtype == FR_LIBBZ2)
2020          bufflength = 2*bufthird +
2021            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2022        else
2023    #endif
2024    
2025        bufflength = 2*bufthird +
2026          (input_line_buffered?
2027           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2028           fread(main_buffer + 2*bufthird, 1, bufthird, in));
2029        endptr = main_buffer + bufflength;
2030    
2031      /* Adjust any last match point */      /* Adjust any last match point */
2032    
2033      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2034      }      }
2035    }     /* Loop through the whole file */    }     /* Loop through the whole file */
2036    
2037  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
2038  hyphenpending if it prints something. */  hyphenpending if it prints something. */
2039    
2040  if (!only_matching && !count_only)  if (!show_only_matching && !count_only)
2041    {    {
2042    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2043    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 834  if (filenames == FN_NOMATCH_ONLY) Line 2056  if (filenames == FN_NOMATCH_ONLY)
2056    
2057  if (count_only)  if (count_only)
2058    {    {
2059    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
2060    fprintf(stdout, "%d\n", count);      {
2061        if (printname != NULL && filenames != FN_NONE)
2062          fprintf(stdout, "%s:", printname);
2063        fprintf(stdout, "%d\n", count);
2064        }
2065    }    }
2066    
2067  return rc;  return rc;
# Line 855  Arguments: Line 2081  Arguments:
2081    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
2082    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
2083    
2084  Returns:   0 if there was at least one match  Returns:  -1 the file/directory was skipped
2085               0 if there was at least one match
2086             1 if there were no matches             1 if there were no matches
2087             2 there was some kind of error             2 there was some kind of error
2088    
# Line 866  static int Line 2093  static int
2093  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2094  {  {
2095  int rc = 1;  int rc = 1;
2096  int sep;  int frtype;
2097  FILE *in;  void *handle;
2098    char *lastcomp;
2099    FILE *in = NULL;           /* Ensure initialized */
2100    
2101    #ifdef SUPPORT_LIBZ
2102    gzFile ingz = NULL;
2103    #endif
2104    
2105    #ifdef SUPPORT_LIBBZ2
2106    BZFILE *inbz2 = NULL;
2107    #endif
2108    
2109    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2110    int pathlen;
2111    #endif
2112    
2113    #if defined NATIVE_ZOS
2114    int zos_type;
2115    FILE *zos_test_file;
2116    #endif
2117    
2118  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
2119    
2120  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
2121    {    {
2122    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN, stdin_name,
2123      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2124        stdin_name : NULL);        stdin_name : NULL);
2125    }    }
2126    
2127    /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2128    directories, whereas --include and --exclude apply to everything else. The test
2129    is against the final component of the path. */
2130    
2131    lastcomp = strrchr(pathname, FILESEP);
2132    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2133    
2134    /* If the file is a directory, skip if not recursing or if explicitly excluded.
2135    Otherwise, scan the directory and recurse for each path within it. The scanning
2136    code is localized so it can be made system-specific. */
2137    
2138    
2139    /* For z/OS, determine the file type. */
2140    
2141    #if defined NATIVE_ZOS
2142    zos_test_file =  fopen(pathname,"rb");
2143    
2144    if (zos_test_file == NULL)
2145       {
2146       if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2147         pathname, strerror(errno));
2148       return -1;
2149       }
2150    zos_type = identifyzosfiletype (zos_test_file);
2151    fclose (zos_test_file);
2152    
2153    /* Handle a PDS in separate code */
2154    
2155    if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2156       {
2157       return travelonpdsdir (pathname, only_one_at_top);
2158       }
2159    
2160    /* Deal with regular files in the normal way below. These types are:
2161       zos_type == __ZOS_PDS_MEMBER
2162       zos_type == __ZOS_PS
2163       zos_type == __ZOS_VSAM_KSDS
2164       zos_type == __ZOS_VSAM_ESDS
2165       zos_type == __ZOS_VSAM_RRDS
2166    */
2167    
2168    /* Handle a z/OS directory using common code. */
2169    
2170    else if (zos_type == __ZOS_HFS)
2171     {
2172    #endif  /* NATIVE_ZOS */
2173    
 /* If the file is a directory, skip if skipping or if we are recursing, scan  
 each file within it, subject to any include or exclude patterns that were set.  
 The scanning code is localized so it can be made system-specific. */  
2174    
2175  if ((sep = isdirectory(pathname)) != 0)  /* Handle directories: common code for all OS */
2176    
2177    if (isdirectory(pathname))
2178    {    {
2179    if (dee_action == dee_SKIP) return 1;    if (dee_action == dee_SKIP ||
2180          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2181        return -1;
2182    
2183    if (dee_action == dee_RECURSE)    if (dee_action == dee_RECURSE)
2184      {      {
2185      char buffer[1024];      char buffer[1024];
# Line 902  if ((sep = isdirectory(pathname)) != 0) Line 2196  if ((sep = isdirectory(pathname)) != 0)
2196    
2197      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
2198        {        {
2199        int frc, blen;        int frc;
2200        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
       blen = strlen(buffer);  
   
       if (exclude_compiled != NULL &&  
           pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
         continue;  
   
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
   
2201        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2202        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
2203         else if (frc == 0 && rc == 1) rc = 0;         else if (frc == 0 && rc == 1) rc = 0;
# Line 924  if ((sep = isdirectory(pathname)) != 0) Line 2208  if ((sep = isdirectory(pathname)) != 0)
2208      }      }
2209    }    }
2210    
2211  /* If the file is not a directory and not a regular file, skip it if that's  #if defined NATIVE_ZOS
2212  been requested. */   }
2213    #endif
2214    
2215  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;  /* If the file is not a directory, check for a regular file, and if it is not,
2216    skip it if that's been requested. Otherwise, check for an explicit inclusion or
2217    exclusion. */
2218    
2219    else if (
2220    #if defined NATIVE_ZOS
2221            (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2222    #else  /* all other OS */
2223            (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2224    #endif
2225            !test_incexc(lastcomp, include_patterns, exclude_patterns))
2226      return -1;  /* File skipped */
2227    
2228  /* Control reaches here if we have a regular file, or if we have a directory  /* Control reaches here if we have a regular file, or if we have a directory
2229  and recursion or skipping was not requested, or if we have anything else and  and recursion or skipping was not requested, or if we have anything else and
# Line 935  skipping was not requested. The scan pro Line 2231  skipping was not requested. The scan pro
2231  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
2232  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
2233    
2234  in = fopen(pathname, "r");  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2235  if (in == NULL)  pathlen = (int)(strlen(pathname));
2236    #endif
2237    
2238    /* Open using zlib if it is supported and the file name ends with .gz. */
2239    
2240    #ifdef SUPPORT_LIBZ
2241    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2242    {    {
2243    if (!silent)    ingz = gzopen(pathname, "rb");
2244      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,    if (ingz == NULL)
2245        strerror(errno));      {
2246    return 2;      if (!silent)
2247          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2248            strerror(errno));
2249        return 2;
2250        }
2251      handle = (void *)ingz;
2252      frtype = FR_LIBZ;
2253    }    }
2254    else
2255    #endif
2256    
2257  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);  
   
 fclose(in);  
 return rc;  
 }  
2258    
2259    #ifdef SUPPORT_LIBBZ2
2260    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2261      {
2262      inbz2 = BZ2_bzopen(pathname, "rb");
2263      handle = (void *)inbz2;
2264      frtype = FR_LIBBZ2;
2265      }
2266    else
2267    #endif
2268    
2269    /* Otherwise use plain fopen(). The label is so that we can come back here if
2270    an attempt to read a .bz2 file indicates that it really is a plain file. */
2271    
2272    #ifdef SUPPORT_LIBBZ2
2273    PLAIN_FILE:
2274    #endif
2275      {
2276      in = fopen(pathname, "rb");
2277      handle = (void *)in;
2278      frtype = FR_PLAIN;
2279      }
2280    
2281  /*************************************************  /* All the opening methods return errno when they fail. */
 *                Usage function                  *  
 *************************************************/  
2282    
2283  static int  if (handle == NULL)
 usage(int rc)  
 {  
 option_item *op;  
 fprintf(stderr, "Usage: pcregrep [-");  
 for (op = optionlist; op->one_char != 0; op++)  
2284    {    {
2285    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (!silent)
2286        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2287          strerror(errno));
2288      return 2;
2289    }    }
 fprintf(stderr, "] [long options] [pattern] [files]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
 }  
   
2290    
2291    /* Now grep the file */
2292    
2293    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2294      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2295    
2296  /*************************************************  /* Close in an appropriate manner. */
 *                Help function                   *  
 *************************************************/  
2297    
2298  static void  #ifdef SUPPORT_LIBZ
2299  help(void)  if (frtype == FR_LIBZ)
2300  {    gzclose(ingz);
2301  option_item *op;  else
2302    #endif
2303    
2304  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2305  printf("Search for PATTERN in each FILE or standard input.\n");  read failed. If the error indicates that the file isn't in fact bzipped, try
2306  printf("PATTERN must be present if neither -e nor -f is used.\n");  again as a normal file. */
2307  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  
2308  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  #ifdef SUPPORT_LIBBZ2
2309    if (frtype == FR_LIBBZ2)
2310      {
2311      if (rc == 3)
2312        {
2313        int errnum;
2314        const char *err = BZ2_bzerror(inbz2, &errnum);
2315        if (errnum == BZ_DATA_ERROR_MAGIC)
2316          {
2317          BZ2_bzclose(inbz2);
2318          goto PLAIN_FILE;
2319          }
2320        else if (!silent)
2321          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2322            pathname, err);
2323        rc = 2;    /* The normal "something went wrong" code */
2324        }
2325      BZ2_bzclose(inbz2);
2326      }
2327    else
2328    #endif
2329    
2330  printf("Options:\n");  /* Normal file close */
2331    
2332  for (op = optionlist; op->one_char != 0; op++)  fclose(in);
   {  
   int n;  
   char s[4];  
   if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
   printf("  %s --%s%n", s, op->long_name, &n);  
   n = 30 - n;  
   if (n < 1) n = 1;  
   printf("%.*s%s\n", n, "                    ", op->help_text);  
   }  
2333    
2334  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  /* Pass back the yield from pcregrep(). */
 printf("trailing white space is removed and blank lines are ignored.\n");  
 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
2335    
2336  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  return rc;
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
2337  }  }
2338    
2339    
2340    
   
2341  /*************************************************  /*************************************************
2342  *    Handle a single-letter, no data option      *  *    Handle a single-letter, no data option      *
2343  *************************************************/  *************************************************/
# Line 1023  handle_option(int letter, int options) Line 2347  handle_option(int letter, int options)
2347  {  {
2348  switch(letter)  switch(letter)
2349    {    {
2350    case N_HELP: help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
2351      case N_HELP: help(); pcregrep_exit(0);
2352      case N_LBUFFER: line_buffered = TRUE; break;
2353      case N_LOFFSETS: line_offsets = number = TRUE; break;
2354      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2355      case 'a': binary_files = BIN_TEXT; break;
2356    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
2357    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
2358    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
2359      case 'I': binary_files = BIN_NOMATCH; break;
2360    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
2361    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
2362    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2363    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
2364    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2365    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
2366    case 'o': only_matching = TRUE; break;  
2367      case 'o':
2368      only_matching_last = add_number(0, only_matching_last);
2369      if (only_matching == NULL) only_matching = only_matching_last;
2370      break;
2371    
2372    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
2373    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
2374    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
2375    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2376    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
2377    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
2378    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
2379    
2380    case 'V':    case 'V':
2381    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stdout, "pcregrep version %s\n", pcre_version());
2382    fprintf(stderr, "PCRE version %s\n", pcre_version());    pcregrep_exit(0);
   exit(0);  
2383    break;    break;
2384    
2385    default:    default:
2386    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2387    exit(usage(2));    pcregrep_exit(usage(2));
2388    }    }
2389    
2390  return options;  return options;
# Line 1088  return buffer; Line 2422  return buffer;
2422  *          Compile a single pattern              *  *          Compile a single pattern              *
2423  *************************************************/  *************************************************/
2424    
2425  /* When the -F option has been used, this is called for each substring.  /* Do nothing if the pattern has already been compiled. This is the case for
2426  Otherwise it's called for each supplied pattern.  include/exclude patterns read from a file.
2427    
2428    When the -F option has been used, each "pattern" may be a list of strings,
2429    separated by line breaks. They will be matched literally. We split such a
2430    string and compile the first substring, inserting an additional block into the
2431    pattern chain.
2432    
2433  Arguments:  Arguments:
2434    pattern        the pattern string    p              points to the pattern block
2435    options        the PCRE options    options        the PCRE options
2436    filename       the file name, or NULL for a command-line pattern    popts          the processing options
2437      fromfile       TRUE if the pattern was read from a file
2438      fromtext       file name or identifying text (e.g. "include")
2439    count          0 if this is the only command line pattern, or    count          0 if this is the only command line pattern, or
2440                   number of the command line pattern, or                   number of the command line pattern, or
2441                   linenumber for a pattern from a file                   linenumber for a pattern from a file
# Line 1103  Returns:         TRUE on success, FALSE Line 2444  Returns:         TRUE on success, FALSE
2444  */  */
2445    
2446  static BOOL  static BOOL
2447  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_pattern(patstr *p, int options, int popts, int fromfile,
2448      const char *fromtext, int count)
2449  {  {
2450  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
2451  const char *error;  const char *error;
2452    char *ps = p->string;
2453    int patlen = strlen(ps);
2454  int errptr;  int errptr;
2455    
2456  if (pattern_count >= MAX_PATTERN_COUNT)  if (p->compiled != NULL) return TRUE;
2457    
2458    if ((popts & PO_FIXED_STRINGS) != 0)
2459    {    {
2460    fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",    int ellength;
2461      (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);    char *eop = ps + patlen;
2462    return FALSE;    char *pe = end_of_line(ps, eop, &ellength);
2463    
2464      if (ellength != 0)
2465        {
2466        if (add_pattern(pe, p) == NULL) return FALSE;
2467        patlen = (int)(pe - ps - ellength);
2468        }
2469    }    }
2470    
2471  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2472    suffix[process_options]);  p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2473  pattern_list[pattern_count] =  if (p->compiled != NULL) return TRUE;
   pcre_compile(buffer, options, &error, &errptr, pcretables);  
 if (pattern_list[pattern_count++] != NULL) return TRUE;  
2474    
2475  /* Handle compile errors */  /* Handle compile errors */
2476    
2477  errptr -= (int)strlen(prefix[process_options]);  errptr -= (int)strlen(prefix[popts]);
2478  if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);  if (errptr > patlen) errptr = patlen;
2479    
2480  if (filename == NULL)  if (fromfile)
2481    {    {
2482    if (count == 0)    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2483      fprintf(stderr, "pcregrep: Error in command-line regex "      "at offset %d: %s\n", count, fromtext, errptr, error);
       "at offset %d: %s\n", errptr, error);  
   else  
     fprintf(stderr, "pcregrep: Error in %s command-line regex "  
       "at offset %d: %s\n", ordin(count), errptr, error);  
2484    }    }
2485  else  else
2486    {    {
2487    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "    if (count == 0)
2488      "at offset %d: %s\n", count, filename, errptr, error);      fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2489          fromtext, errptr, error);
2490      else
2491        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2492          ordin(count), fromtext, errptr, error);
2493    }    }
2494    
2495  return FALSE;  return FALSE;
# Line 1148  return FALSE; Line 2498  return FALSE;
2498    
2499    
2500  /*************************************************  /*************************************************
2501  *           Compile one supplied pattern         *  *     Read and compile a file of patterns        *
2502  *************************************************/  *************************************************/
2503    
2504  /* When the -F option has been used, each string may be a list of strings,  /* This is used for --filelist, --include-from, and --exclude-from.
 separated by newlines. They will be matched literally.  
2505    
2506  Arguments:  Arguments:
2507    pattern        the pattern string    name         the name of the file; "-" is stdin
2508    options        the PCRE options    patptr       pointer to the pattern chain anchor
2509    filename       the file name, or NULL for a command-line pattern    patlastptr   pointer to the last pattern pointer
2510    count          0 if this is the only command line pattern, or    popts        the process options to pass to pattern_compile()
                  number of the command line pattern, or  
                  linenumber for a pattern from a file  
2511    
2512  Returns:         TRUE on success, FALSE after an error  Returns:       TRUE if all went well
2513  */  */
2514    
2515  static BOOL  static BOOL
2516  compile_pattern(char *pattern, int options, char *filename, int count)  read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2517  {  {
2518  if ((process_options & PO_FIXED_STRINGS) != 0)  int linenumber = 0;
2519    FILE *f;
2520    char *filename;
2521    char buffer[PATBUFSIZE];
2522    
2523    if (strcmp(name, "-") == 0)
2524      {
2525      f = stdin;
2526      filename = stdin_name;
2527      }
2528    else
2529      {
2530      f = fopen(name, "r");
2531      if (f == NULL)
2532        {
2533        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2534        return FALSE;
2535        }
2536      filename = name;
2537      }
2538    
2539    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2540    {    {
2541    char buffer[MBUFTHIRD];    char *s = buffer + (int)strlen(buffer);
2542      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2543      *s = 0;
2544      linenumber++;
2545      if (buffer[0] == 0) continue;   /* Skip blank lines */
2546    
2547      /* Note: this call to add_pattern() puts a pointer to the local variable
2548      "buffer" into the pattern chain. However, that pointer is used only when
2549      compiling the pattern, which happens immediately below, so we flatten it
2550      afterwards, as a precaution against any later code trying to use it. */
2551    
2552      *patlastptr = add_pattern(buffer, *patlastptr);
2553      if (*patlastptr == NULL)
2554        {
2555        if (f != stdin) fclose(f);
2556        return FALSE;
2557        }
2558      if (*patptr == NULL) *patptr = *patlastptr;
2559    
2560      /* This loop is needed because compiling a "pattern" when -F is set may add
2561      on additional literal patterns if the original contains a newline. In the
2562      common case, it never will, because fgets() stops at a newline. However,
2563      the -N option can be used to give pcregrep a different newline setting. */
2564    
2565    for(;;)    for(;;)
2566      {      {
2567      char *p = strchr(pattern, '\n');      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2568      if (p == NULL)          linenumber))
2569        return compile_single_pattern(pattern, options, filename, count);        {
2570      sprintf(buffer, "%.*s", p - pattern, pattern);        if (f != stdin) fclose(f);
     pattern = p + 1;  
     if (!compile_single_pattern(buffer, options, filename, count))  
2571        return FALSE;        return FALSE;
2572          }
2573        (*patlastptr)->string = NULL;            /* Insurance */
2574        if ((*patlastptr)->next == NULL) break;
2575        *patlastptr = (*patlastptr)->next;
2576      }      }
2577    }    }
2578  else return compile_single_pattern(pattern, options, filename, count);  
2579    if (f != stdin) fclose(f);
2580    return TRUE;
2581  }  }
2582    
2583    
# Line 1198  main(int argc, char **argv) Line 2593  main(int argc, char **argv)
2593  {  {
2594  int i, j;  int i, j;
2595  int rc = 1;  int rc = 1;
 int pcre_options = 0;  
 int cmd_pattern_count = 0;  
 int errptr;  
2596  BOOL only_one_at_top;  BOOL only_one_at_top;
2597  char *patterns[MAX_PATTERN_COUNT];  patstr *cp;
2598    fnstr *fn;
2599  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2600  const char *error;  const char *error;
2601    
2602    #ifdef SUPPORT_PCREGREP_JIT
2603    pcre_jit_stack *jit_stack = NULL;
2604    #endif
2605    
2606    /* Set the default line ending value from the default in the PCRE library;
2607    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2608    Note that the return values from pcre_config(), though derived from the ASCII
2609    codes, are the same in EBCDIC environments, so we must use the actual values
2610    rather than escapes such as as '\r'. */
2611    
2612    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2613    switch(i)
2614      {
2615      default:               newline = (char *)"lf"; break;
2616      case 13:               newline = (char *)"cr"; break;
2617      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2618      case -1:               newline = (char *)"any"; break;
2619      case -2:               newline = (char *)"anycrlf"; break;
2620      }
2621    
2622  /* Process the options */  /* Process the options */
2623    
2624  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1222  for (i = 1; i < argc; i++) Line 2635  for (i = 1; i < argc; i++)
2635    
2636    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2637      {      {
2638      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_files != NULL || patterns != NULL) break;
2639        else exit(usage(2));        else pcregrep_exit(usage(2));
2640      }      }
2641    
2642    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1245  for (i = 1; i < argc; i++) Line 2658  for (i = 1; i < argc; i++)
2658      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2659      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2660      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2661      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2662      these categories, fortunately. */      both these categories. */
2663    
2664      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2665        {        {
2666        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2667        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2668        if (opbra == NULL)     /* Not a (p) case */  
2669          /* Handle options with only one spelling of the name */
2670    
2671          if (opbra == NULL)     /* Does not contain '(' */
2672          {          {
2673          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2674            {            {
# Line 1260  for (i = 1; i < argc; i++) Line 2676  for (i = 1; i < argc; i++)
2676            }            }
2677          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2678            {            {
2679            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2680            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2681                (int)strlen(arg) : (int)(argequals - arg);
2682            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2683              {              {
2684              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1274  for (i = 1; i < argc; i++) Line 2691  for (i = 1; i < argc; i++)
2691              }              }
2692            }            }
2693          }          }
2694        else                   /* Special case xxxx(p) */  
2695          /* Handle options with an alternate spelling of the name */
2696    
2697          else
2698          {          {
2699          char buff1[24];          char buff1[24];
2700          char buff2[24];          char buff2[24];
2701          int baselen = opbra - op->long_name;  
2702            int baselen = (int)(opbra - op->long_name);
2703            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2704            int arglen = (argequals == NULL || equals == NULL)?
2705              (int)strlen(arg) : (int)(argequals - arg);
2706    
2707          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2708          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2709            opbra + 1);  
2710          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2711               strncmp(arg, buff2, arglen) == 0)
2712              {
2713              if (equals != NULL && argequals != NULL)
2714                {
2715                option_data = argequals;
2716                if (*option_data == '=')
2717                  {
2718                  option_data++;
2719                  longopwasequals = TRUE;
2720                  }
2721                }
2722            break;            break;
2723              }
2724          }          }
2725        }        }
2726    
2727      if (op->one_char == 0)      if (op->one_char == 0)
2728        {        {
2729        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2730        exit(usage(2));        pcregrep_exit(usage(2));
2731        }        }
2732      }      }
2733    
2734      /* Jeffrey Friedl's debugging harness uses these additional options which
2735      are not in the right form for putting in the option table because they use
2736      only one hyphen, yet are more than one character long. By putting them
2737      separately here, they will not get displayed as part of the help() output,
2738      but I don't think Jeffrey will care about that. */
2739    
2740    #ifdef JFRIEDL_DEBUG
2741      else if (strcmp(argv[i], "-pre") == 0) {
2742              jfriedl_prefix = argv[++i];
2743              continue;
2744      } else if (strcmp(argv[i], "-post") == 0) {
2745              jfriedl_postfix = argv[++i];
2746              continue;
2747      } else if (strcmp(argv[i], "-XT") == 0) {
2748              sscanf(argv[++i], "%d", &jfriedl_XT);
2749              continue;
2750      } else if (strcmp(argv[i], "-XR") == 0) {
2751              sscanf(argv[++i], "%d", &jfriedl_XR);
2752              continue;
2753      }
2754    #endif
2755    
2756    
2757    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
2758    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
2759    
# Line 1301  for (i = 1; i < argc; i++) Line 2761  for (i = 1; i < argc; i++)
2761      {      {
2762      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2763      longop = FALSE;      longop = FALSE;
2764    
2765      while (*s != 0)      while (*s != 0)
2766        {        {
2767        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2768          { if (*s == op->one_char) break; }          {
2769            if (*s == op->one_char) break;
2770            }
2771        if (op->one_char == 0)        if (op->one_char == 0)
2772          {          {
2773          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2774            *s, argv[i]);            *s, argv[i]);
2775          exit(usage(2));          pcregrep_exit(usage(2));
2776            }
2777    
2778          option_data = s+1;
2779    
2780          /* Break out if this is the last character in the string; it's handled
2781          below like a single multi-char option. */
2782    
2783          if (*option_data == 0) break;
2784    
2785          /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2786          are used for ones that either have a numerical number or defaults, i.e.
2787          the data is optional. If a digit follows, there is data; if not, carry on
2788          with other single-character options in the same string. */
2789    
2790          if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2791            {
2792            if (isdigit((unsigned char)s[1])) break;
2793          }          }
2794        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for an option with data */
2795          {          {
2796          option_data = s+1;          if (op->type != OP_NODATA) break;
         break;  
2797          }          }
2798    
2799          /* Handle a single-character option with no data, then loop for the
2800          next character in the string. */
2801    
2802        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2803        }        }
2804      }      }
# Line 1330  for (i = 1; i < argc; i++) Line 2813  for (i = 1; i < argc; i++)
2813      continue;      continue;
2814      }      }
2815    
2816    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2817    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2818    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2819    Jeffrey Friedl's special debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2820    
2821    if (*option_data == 0 &&    if (*option_data == 0 &&
2822        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2823           op->type == OP_OP_NUMBERS))
2824      {      {
2825      switch (op->one_char)      switch (op->one_char)
2826        {        {
2827        case N_COLOUR:        case N_COLOUR:
2828        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2829        break;        break;
2830    
2831          case 'o':
2832          only_matching_last = add_number(0, only_matching_last);
2833          if (only_matching == NULL) only_matching = only_matching_last;
2834          break;
2835    
2836  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2837        case 'S':        case 'S':
2838        S_arg = 0;        S_arg = 0;
# Line 1359  for (i = 1; i < argc; i++) Line 2849  for (i = 1; i < argc; i++)
2849      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2850        {        {
2851        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2852        exit(usage(2));        pcregrep_exit(usage(2));
2853        }        }
2854      option_data = argv[++i];      option_data = argv[++i];
2855      }      }
2856    
2857    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2858    multiple times to create a list of patterns. */    added to a chain of numbers. */
2859    
2860      if (op->type == OP_OP_NUMBERS)
2861        {
2862        unsigned long int n = decode_number(option_data, op, longop);
2863        omdatastr *omd = (omdatastr *)op->dataptr;
2864        *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2865        if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2866        }
2867    
2868      /* If the option type is OP_PATLIST, it's the -e option, or one of the
2869      include/exclude options, which can be called multiple times to create lists
2870      of patterns. */
2871    
2872      else if (op->type == OP_PATLIST)
2873        {
2874        patdatastr *pd = (patdatastr *)op->dataptr;
2875        *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2876        if (*(pd->lastptr) == NULL) goto EXIT2;
2877        if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2878        }
2879    
2880      /* If the option type is OP_FILELIST, it's one of the options that names a
2881      file. */
2882    
2883    if (op->type == OP_PATLIST)    else if (op->type == OP_FILELIST)
2884      {      {
2885      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      fndatastr *fd = (fndatastr *)op->dataptr;
2886        fn = (fnstr *)malloc(sizeof(fnstr));
2887        if (fn == NULL)
2888        {        {
2889        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",        fprintf(stderr, "pcregrep: malloc failed\n");
2890          MAX_PATTERN_COUNT);        goto EXIT2;
2891        return 2;        }
2892        fn->next = NULL;
2893        fn->name = option_data;
2894        if (*(fd->anchor) == NULL)
2895          *(fd->anchor) = fn;
2896        else
2897          (*(fd->lastptr))->next = fn;
2898        *(fd->lastptr) = fn;
2899        }
2900    
2901      /* Handle OP_BINARY_FILES */
2902    
2903      else if (op->type == OP_BINFILES)
2904        {
2905        if (strcmp(option_data, "binary") == 0)
2906          binary_files = BIN_BINARY;
2907        else if (strcmp(option_data, "without-match") == 0)
2908          binary_files = BIN_NOMATCH;
2909        else if (strcmp(option_data, "text") == 0)
2910          binary_files = BIN_TEXT;
2911        else
2912          {
2913          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2914            option_data);
2915          pcregrep_exit(usage(2));
2916        }        }
     patterns[cmd_pattern_count++] = option_data;  
2917      }      }
2918    
2919    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with a single string or numeric data value. */
2920    
2921    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2922               op->type != OP_OP_NUMBER)
2923      {      {
2924      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2925      }      }
2926    else    else
2927      {      {
2928      char *endptr;      unsigned long int n = decode_number(option_data, op, longop);
2929      int n = strtoul(option_data, &endptr, 10);      if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2930      if (*endptr != 0)        else *((int *)op->dataptr) = n;
       {  
       if (longop)  
         {  
         char *equals = strchr(op->long_name, '=');  
         int nlen = (equals == NULL)? (int)strlen(op->long_name) :  
           equals - op->long_name;  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",  
           option_data, nlen, op->long_name);  
         }  
       else  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",  
           option_data, op->one_char);  
       exit(usage(2));  
       }  
     *((int *)op->dataptr) = n;  
2931      }      }
2932    }    }
2933    
# Line 1416  if (both_context > 0) Line 2940  if (both_context > 0)
2940    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2941    }    }
2942    
2943    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2944    However, all three set show_only_matching because they display, each in their
2945    own way, only the data that has matched. */
2946    
2947    if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2948        (file_offsets && line_offsets))
2949      {
2950      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2951        "and/or --line-offsets\n");
2952      pcregrep_exit(usage(2));
2953      }
2954    
2955    if (only_matching != NULL || file_offsets || line_offsets)
2956      show_only_matching = TRUE;
2957    
2958  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2959  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2960    
# Line 1431  if (locale == NULL) Line 2970  if (locale == NULL)
2970    locale_from = "LC_CTYPE";    locale_from = "LC_CTYPE";
2971    }    }
2972    
2973  /* If a locale has been provided, set it, and generate the tables the PCRE  /* If a locale is set, use it to generate the tables the PCRE needs. Otherwise,
2974  needs. Otherwise, pcretables==NULL, which causes the use of default tables. */  pcretables==NULL, which causes the use of default tables. */
2975    
2976  if (locale != NULL)  if (locale != NULL)
2977    {    {
# Line 1440  if (locale != NULL) Line 2979  if (locale != NULL)
2979      {      {
2980      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2981        locale, locale_from);        locale, locale_from);
2982      return 2;      goto EXIT2;
2983      }      }
2984    pcretables = pcre_maketables();    pcretables = pcre_maketables();
2985    }    }
# Line 1455  if (colour_option != NULL && strcmp(colo Line 2994  if (colour_option != NULL && strcmp(colo
2994      {      {
2995      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2996        colour_option);        colour_option);
2997      return 2;      goto EXIT2;
2998      }      }
2999    if (do_colour)    if (do_colour)
3000      {      {
# Line 1465  if (colour_option != NULL && strcmp(colo Line 3004  if (colour_option != NULL && strcmp(colo
3004      }      }
3005    }    }
3006    
3007    /* Interpret the newline type; the default settings are Unix-like. */
3008    
3009    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
3010      {
3011      pcre_options |= PCRE_NEWLINE_CR;
3012      endlinetype = EL_CR;
3013      }
3014    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
3015      {
3016      pcre_options |= PCRE_NEWLINE_LF;
3017      endlinetype = EL_LF;
3018      }
3019    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
3020      {
3021      pcre_options |= PCRE_NEWLINE_CRLF;
3022      endlinetype = EL_CRLF;
3023      }
3024    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
3025      {
3026      pcre_options |= PCRE_NEWLINE_ANY;
3027      endlinetype = EL_ANY;
3028      }
3029    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
3030      {
3031      pcre_options |= PCRE_NEWLINE_ANYCRLF;
3032      endlinetype = EL_ANYCRLF;
3033      }
3034    else
3035      {
3036      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3037      goto EXIT2;
3038      }
3039    
3040  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
3041    
3042  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1475  if (dee_option != NULL) Line 3047  if (dee_option != NULL)
3047    else    else
3048      {      {
3049      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3050      return 2;      goto EXIT2;
3051      }      }
3052    }    }
3053    
# Line 1486  if (DEE_option != NULL) Line 3058  if (DEE_option != NULL)
3058    else    else
3059      {      {
3060      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3061      return 2;      goto EXIT2;
3062      }      }
3063    }    }
3064    
3065  /* Check the value for Jeff Friedl's debugging option. */  /* Check the values for Jeffrey Friedl's debugging options. */
3066    
3067  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
3068  if (S_arg > 9)  if (S_arg > 9)
# Line 1498  if (S_arg > 9) Line 3070  if (S_arg > 9)
3070    fprintf(stderr, "pcregrep: bad value for -S option\n");    fprintf(stderr, "pcregrep: bad value for -S option\n");
3071    return 2;    return 2;
3072    }    }
3073    if (jfriedl_XT != 0 || jfriedl_XR != 0)
3074      {
3075      if (jfriedl_XT == 0) jfriedl_XT = 1;
3076      if (jfriedl_XR == 0) jfriedl_XR = 1;
3077      }
3078  #endif  #endif
3079    
3080  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer. */
3081    
3082  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  bufsize = 3*bufthird;
3083  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  main_buffer = (char *)malloc(bufsize);
3084    
3085  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL)
3086    {    {
3087    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
3088    return 2;    goto EXIT2;
3089    }    }
3090    
3091  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there are no files provided by -f,
3092  the first argument is the one and only pattern, and it must exist. */  the first argument is the one and only pattern, and it must exist. */
3093    
3094  if (cmd_pattern_count == 0 && pattern_filename == NULL)  if (patterns == NULL && pattern_files == NULL)
3095    {    {
3096    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
3097    patterns[cmd_pattern_count++] = argv[i++];    patterns = patterns_last = add_pattern(argv[i++], NULL);
3098      if (patterns == NULL) goto EXIT2;
3099    }    }
3100    
3101  /* Compile the patterns that were provided on the command line, either by  /* Compile the patterns that were provided on the command line, either by
3102  multiple uses of -e or as a single unkeyed pattern. */  multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3103    after all the command-line options are read so that we know which PCRE options
3104    to use. When -F is used, compile_pattern() may add another block into the
3105    chain, so we must not access the next pointer till after the compile. */
3106    
3107  for (j = 0; j < cmd_pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3108    {    {
3109    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3110         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 1 && patterns->next == NULL)? 0 : j))
3111      return 2;      goto EXIT2;
3112    }    }
3113    
3114  /* Compile the regular expressions that are provided in a file. */  /* Read and compile the regular expressions that are provided in files. */
3115    
3116  if (pattern_filename != NULL)  for (fn = pattern_files; fn != NULL; fn = fn->next)
3117    {    {
3118    int linenumber = 0;    if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3119    FILE *f;      goto EXIT2;
3120    char *filename;    }
   char buffer[MBUFTHIRD];  
3121    
3122    if (strcmp(pattern_filename, "-") == 0)  /* Study the regular expressions, as we will be running them many times. If an
3123      {  extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3124      f = stdin;  returned, even if studying produces no data. */
     filename = stdin_name;  
     }  
   else  
     {  
     f = fopen(pattern_filename, "r");  
     if (f == NULL)  
       {  
       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,  
         strerror(errno));  
       return 2;  
       }  
     filename = pattern_filename;  
     }  
3125    
3126    while (fgets(buffer, MBUFTHIRD, f) != NULL)  if (match_limit > 0 || match_limit_recursion > 0)
3127      {    study_options |= PCRE_STUDY_EXTRA_NEEDED;
     char *s = buffer + (int)strlen(buffer);  
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;  
     *s = 0;  
     linenumber++;  
     if (buffer[0] == 0) continue;   /* Skip blank lines */  
     if (!compile_pattern(buffer, pcre_options, filename, linenumber))  
       return 2;  
     }  
3128    
3129    if (f != stdin) fclose(f);  /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
   }  
3130    
3131  /* Study the regular expressions, as we will be running them many times */  #ifdef SUPPORT_PCREGREP_JIT
3132    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3133      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3134    #endif
3135    
3136  for (j = 0; j < pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3137    {    {
3138    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
3139    if (error != NULL)    if (error != NULL)
3140      {      {
3141      char s[16];      char s[16];
3142      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3143      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3144      return 2;      goto EXIT2;
3145      }      }
3146    #ifdef SUPPORT_PCREGREP_JIT
3147      if (jit_stack != NULL && cp->hint != NULL)
3148        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3149    #endif
3150    }    }
3151    
3152  /* If there are include or exclude patterns, compile them. */  /* If --match-limit or --recursion-limit was set, put the value(s) into the
3153    pcre_extra block for each pattern. There will always be an extra block because
3154    of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3155    
3156  if (exclude_pattern != NULL)  for (cp = patterns; cp != NULL; cp = cp->next)
3157    {    {
3158    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,    if (match_limit > 0)
     pcretables);  
   if (exclude_compiled == NULL)  
3159      {      {
3160      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3161        errptr, error);      cp->hint->match_limit = match_limit;
3162      return 2;      }
3163    
3164      if (match_limit_recursion > 0)
3165        {
3166        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3167        cp->hint->match_limit_recursion = match_limit_recursion;
3168      }      }
3169    }    }
3170    
3171  if (include_pattern != NULL)  /* If there are include or exclude patterns read from the command line, compile
3172    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3173    0. */
3174    
3175    for (j = 0; j < 4; j++)
3176    {    {
3177    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,    int k;
3178      pcretables);    for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
   if (include_compiled == NULL)  
3179      {      {
3180      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3181        errptr, error);           (k == 1 && cp->next == NULL)? 0 : k))
3182      return 2;        goto EXIT2;
3183      }      }
3184    }    }
3185    
3186  /* If there are no further arguments, do the business on stdin and exit. */  /* Read and compile include/exclude patterns from files. */
3187    
3188    for (fn = include_from; fn != NULL; fn = fn->next)
3189      {
3190      if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3191        goto EXIT2;
3192      }
3193    
3194    for (fn = exclude_from; fn != NULL; fn = fn->next)
3195      {
3196      if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3197        goto EXIT2;
3198      }
3199    
3200    /* If there are no files that contain lists of files to search, and there are
3201    no file arguments, search stdin, and then exit. */
3202    
3203    if (file_lists == NULL && i >= argc)
3204      {
3205      rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3206        (filenames > FN_DEFAULT)? stdin_name : NULL);
3207      goto EXIT;
3208      }
3209    
3210    /* If any files that contains a list of files to search have been specified,
3211    read them line by line and search the given files. */
3212    
3213  if (i >= argc)  for (fn = file_lists; fn != NULL; fn = fn->next)
3214    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
3215      char buffer[PATBUFSIZE];
3216      FILE *fl;
3217      if (strcmp(fn->name, "-") == 0) fl = stdin; else
3218        {
3219        fl = fopen(fn->name, "rb");
3220        if (fl == NULL)
3221          {
3222          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3223            strerror(errno));
3224          goto EXIT2;
3225          }
3226        }
3227      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3228        {
3229        int frc;
3230        char *end = buffer + (int)strlen(buffer);
3231        while (end > buffer && isspace(end[-1])) end--;
3232        *end = 0;
3233        if (*buffer != 0)
3234          {
3235          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3236          if (frc > 1) rc = frc;
3237            else if (frc == 0 && rc == 1) rc = 0;
3238          }
3239        }
3240      if (fl != stdin) fclose(fl);
3241      }
3242    
3243  /* Otherwise, work through the remaining arguments as files or directories.  /* After handling file-list, work through remaining arguments. Pass in the fact
3244  Pass in the fact that there is only one argument at top level - this suppresses  that there is only one argument at top level - this suppresses the file name if
3245  the file name if the argument is not a directory and filenames are not  the argument is not a directory and filenames are not otherwise forced. */
 otherwise forced. */  
3246    
3247  only_one_at_top = i == argc - 1;   /* Catch initial value of i */  only_one_at_top = i == argc - 1 && file_lists == NULL;
3248    
3249  for (; i < argc; i++)  for (; i < argc; i++)
3250    {    {
# Line 1630  for (; i < argc; i++) Line 3254  for (; i < argc; i++)
3254      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
3255    }    }
3256    
3257  return rc;  EXIT:
3258    #ifdef SUPPORT_PCREGREP_JIT
3259    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3260    #endif
3261    
3262    free(main_buffer);
3263    free((void *)pcretables);
3264    
3265    free_pattern_chain(patterns);
3266    free_pattern_chain(include_patterns);
3267    free_pattern_chain(include_dir_patterns);
3268    free_pattern_chain(exclude_patterns);
3269    free_pattern_chain(exclude_dir_patterns);
3270    
3271    free_file_chain(exclude_from);
3272    free_file_chain(include_from);
3273    free_file_chain(pattern_files);
3274    free_file_chain(file_lists);
3275    
3276    while (only_matching != NULL)
3277      {
3278      omstr *this = only_matching;
3279      only_matching = this->next;
3280      free(this);
3281      }
3282    
3283    pcregrep_exit(rc);
3284    
3285    EXIT2:
3286    rc = 2;
3287    goto EXIT;
3288  }  }
3289    
3290  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.87  
changed lines
  Added in v.1492

  ViewVC Help
Powered by ViewVC 1.1.5