/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 296 by ph10, Tue Jan 1 20:09:30 2008 UTC revision 1502 by ph10, Mon Sep 15 13:56:18 2014 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7  directories.  recurse into directories, and in z/OS it can handle PDS files.
8    
9             Copyright (c) 1997-2007 University of Cambridge  Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10    additional header is required. That header is not included in the main PCRE
11    distribution because other apparatus is needed to compile pcregrep for z/OS.
12    The header can be found in the special z/OS distribution, which is available
13    from www.zaconsultants.net or from www.cbttape.org.
14    
15               Copyright (c) 1997-2014 University of Cambridge
16    
17  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
18  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 70  POSSIBILITY OF SUCH DAMAGE. Line 76  POSSIBILITY OF SUCH DAMAGE.
76    
77  typedef int BOOL;  typedef int BOOL;
78    
79  #define MAX_PATTERN_COUNT 100  #define OFFSET_SIZE 99
80    
81  #if BUFSIZ > 8192  #if BUFSIZ > 8192
82  #define MBUFTHIRD BUFSIZ  #define MAXPATLEN BUFSIZ
83  #else  #else
84  #define MBUFTHIRD 8192  #define MAXPATLEN 8192
85  #endif  #endif
86    
87    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
88    
89  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
90  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
91  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
92    
93  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94    
95  /* File reading styles */  /* File reading styles */
96    
# Line 103  enum { DEE_READ, DEE_SKIP }; Line 111  enum { DEE_READ, DEE_SKIP };
111    
112  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113    
114    /* Binary file options */
115    
116    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117    
118    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119    environments), a warning is issued if the value of fwrite() is ignored.
120    Unfortunately, casting to (void) does not suppress the warning. To get round
121    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122    apply to fprintf(). */
123    
124    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125    
126    
127    
128  /*************************************************  /*************************************************
# Line 126  static char *colour_string = (char *)"1; Line 146  static char *colour_string = (char *)"1;
146  static char *colour_option = NULL;  static char *colour_option = NULL;
147  static char *dee_option = NULL;  static char *dee_option = NULL;
148  static char *DEE_option = NULL;  static char *DEE_option = NULL;
149    static char *locale = NULL;
150    static char *main_buffer = NULL;
151  static char *newline = NULL;  static char *newline = NULL;
152  static char *pattern_filename = NULL;  static char *om_separator = (char *)"";
153  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
 static char *locale = NULL;  
154    
155  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
156    
 static int  pattern_count = 0;  
 static pcre **pattern_list = NULL;  
 static pcre_extra **hints_list = NULL;  
   
 static char *include_pattern = NULL;  
 static char *exclude_pattern = NULL;  
   
 static pcre *include_compiled = NULL;  
 static pcre *exclude_compiled = NULL;  
   
157  static int after_context = 0;  static int after_context = 0;
158  static int before_context = 0;  static int before_context = 0;
159    static int binary_files = BIN_BINARY;
160  static int both_context = 0;  static int both_context = 0;
161    static int bufthird = PCREGREP_BUFSIZE;
162    static int bufsize = 3*PCREGREP_BUFSIZE;
163    
164    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165    static int dee_action = dee_SKIP;
166    #else
167  static int dee_action = dee_READ;  static int dee_action = dee_READ;
168    #endif
169    
170  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
171  static int error_count = 0;  static int error_count = 0;
172  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
173    static int pcre_options = 0;
174  static int process_options = 0;  static int process_options = 0;
175    
176    #ifdef SUPPORT_PCREGREP_JIT
177    static int study_options = PCRE_STUDY_JIT_COMPILE;
178    #else
179    static int study_options = 0;
180    #endif
181    
182    static unsigned long int match_limit = 0;
183    static unsigned long int match_limit_recursion = 0;
184    
185  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
186  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
187  static BOOL file_offsets = FALSE;  static BOOL file_offsets = FALSE;
188  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
189  static BOOL invert = FALSE;  static BOOL invert = FALSE;
190    static BOOL line_buffered = FALSE;
191  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
192  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
193  static BOOL number = FALSE;  static BOOL number = FALSE;
194  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
195    static BOOL resource_error = FALSE;
196  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
197    static BOOL show_only_matching = FALSE;
198  static BOOL silent = FALSE;  static BOOL silent = FALSE;
199  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
200    
201    /* Structure for list of --only-matching capturing numbers. */
202    
203    typedef struct omstr {
204      struct omstr *next;
205      int groupnum;
206    } omstr;
207    
208    static omstr *only_matching = NULL;
209    static omstr *only_matching_last = NULL;
210    
211    /* Structure for holding the two variables that describe a number chain. */
212    
213    typedef struct omdatastr {
214      omstr **anchor;
215      omstr **lastptr;
216    } omdatastr;
217    
218    static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219    
220    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
221    
222    typedef struct fnstr {
223      struct fnstr *next;
224      char *name;
225    } fnstr;
226    
227    static fnstr *exclude_from = NULL;
228    static fnstr *exclude_from_last = NULL;
229    static fnstr *include_from = NULL;
230    static fnstr *include_from_last = NULL;
231    
232    static fnstr *file_lists = NULL;
233    static fnstr *file_lists_last = NULL;
234    static fnstr *pattern_files = NULL;
235    static fnstr *pattern_files_last = NULL;
236    
237    /* Structure for holding the two variables that describe a file name chain. */
238    
239    typedef struct fndatastr {
240      fnstr **anchor;
241      fnstr **lastptr;
242    } fndatastr;
243    
244    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245    static fndatastr include_from_data = { &include_from, &include_from_last };
246    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248    
249    /* Structure for pattern and its compiled form; used for matching patterns and
250    also for include/exclude patterns. */
251    
252    typedef struct patstr {
253      struct patstr *next;
254      char *string;
255      pcre *compiled;
256      pcre_extra *hint;
257    } patstr;
258    
259    static patstr *patterns = NULL;
260    static patstr *patterns_last = NULL;
261    static patstr *include_patterns = NULL;
262    static patstr *include_patterns_last = NULL;
263    static patstr *exclude_patterns = NULL;
264    static patstr *exclude_patterns_last = NULL;
265    static patstr *include_dir_patterns = NULL;
266    static patstr *include_dir_patterns_last = NULL;
267    static patstr *exclude_dir_patterns = NULL;
268    static patstr *exclude_dir_patterns_last = NULL;
269    
270    /* Structure holding the two variables that describe a pattern chain. A pointer
271    to such structures is used for each appropriate option. */
272    
273    typedef struct patdatastr {
274      patstr **anchor;
275      patstr **lastptr;
276    } patdatastr;
277    
278    static patdatastr match_patdata = { &patterns, &patterns_last };
279    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283    
284    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285                                     &include_dir_patterns, &exclude_dir_patterns };
286    
287    static const char *incexname[4] = { "--include", "--exclude",
288                                        "--include-dir", "--exclude-dir" };
289    
290  /* Structure for options and list of them */  /* Structure for options and list of them */
291    
292  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293         OP_PATLIST };         OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294    
295  typedef struct option_item {  typedef struct option_item {
296    int type;    int type;
# Line 181  typedef struct option_item { Line 303  typedef struct option_item {
303  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
304  used to identify them. */  used to identify them. */
305    
306  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
307  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
308  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
309  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
310  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
311  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
312  #define N_NULL      (-7)  #define N_LABEL        (-7)
313  #define N_LOFFSETS  (-8)  #define N_LOCALE       (-8)
314  #define N_FOFFSETS  (-9)  #define N_NULL         (-9)
315    #define N_LOFFSETS     (-10)
316    #define N_FOFFSETS     (-11)
317    #define N_LBUFFER      (-12)
318    #define N_M_LIMIT      (-13)
319    #define N_M_LIMIT_REC  (-14)
320    #define N_BUFSIZE      (-15)
321    #define N_NOJIT        (-16)
322    #define N_FILE_LIST    (-17)
323    #define N_BINARY_FILES (-18)
324    #define N_EXCLUDE_FROM (-19)
325    #define N_INCLUDE_FROM (-20)
326    #define N_OM_SEPARATOR (-21)
327    
328  static option_item optionlist[] = {  static option_item optionlist[] = {
329    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
330    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
331    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
332    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
333    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
334    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
335    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
336    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
337    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
338    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
339    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
340    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
341    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
342    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
343    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
344    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
345    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
347    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
348    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
349    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
350    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
351    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },  #ifdef SUPPORT_PCREGREP_JIT
352    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
353    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },  #else
354    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
355    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },  #endif
356    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
357    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
358    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
359      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
360      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
361      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
362      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
363      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
365      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
367      { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368      { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
370      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
371      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
372      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
373      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377    
378      /* These two were accidentally implemented with underscores instead of
379      hyphens in the option names. As this was not discovered for several releases,
380      the incorrect versions are left in the table for compatibility. However, the
381      --help function misses out any option that has an underscore in its name. */
382    
383      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385    
386  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
387    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
388  #endif  #endif
# Line 237  static option_item optionlist[] = { Line 398  static option_item optionlist[] = {
398  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399  options. These set the 1, 2, and 4 bits in process_options, respectively. Note  options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400  that the combination of -w and -x has the same effect as -x on its own, so we  that the combination of -w and -x has the same effect as -x on its own, so we
401  can treat them as the same. */  can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402    prefix+suffix is 10 characters; if anything longer is added, it must be
403    adjusted. */
404    
405  static const char *prefix[] = {  static const char *prefix[] = {
406    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
# Line 258  const char utf8_table4[] = { Line 421  const char utf8_table4[] = {
421    
422    
423  /*************************************************  /*************************************************
424    *         Exit from the program                  *
425    *************************************************/
426    
427    /* If there has been a resource error, give a suitable message.
428    
429    Argument:  the return code
430    Returns:   does not return
431    */
432    
433    static void
434    pcregrep_exit(int rc)
435    {
436    if (resource_error)
437      {
438      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440        PCRE_ERROR_JIT_STACKLIMIT);
441      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442      }
443    exit(rc);
444    }
445    
446    
447    /*************************************************
448    *          Add item to chain of patterns         *
449    *************************************************/
450    
451    /* Used to add an item onto a chain, or just return an unconnected item if the
452    "after" argument is NULL.
453    
454    Arguments:
455      s          pattern string to add
456      after      if not NULL points to item to insert after
457    
458    Returns:     new pattern block or NULL on error
459    */
460    
461    static patstr *
462    add_pattern(char *s, patstr *after)
463    {
464    patstr *p = (patstr *)malloc(sizeof(patstr));
465    if (p == NULL)
466      {
467      fprintf(stderr, "pcregrep: malloc failed\n");
468      pcregrep_exit(2);
469      }
470    if (strlen(s) > MAXPATLEN)
471      {
472      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473        MAXPATLEN);
474      free(p);
475      return NULL;
476      }
477    p->next = NULL;
478    p->string = s;
479    p->compiled = NULL;
480    p->hint = NULL;
481    
482    if (after != NULL)
483      {
484      p->next = after->next;
485      after->next = p;
486      }
487    return p;
488    }
489    
490    
491    /*************************************************
492    *           Free chain of patterns               *
493    *************************************************/
494    
495    /* Used for several chains of patterns.
496    
497    Argument: pointer to start of chain
498    Returns:  nothing
499    */
500    
501    static void
502    free_pattern_chain(patstr *pc)
503    {
504    while (pc != NULL)
505      {
506      patstr *p = pc;
507      pc = p->next;
508      if (p->hint != NULL) pcre_free_study(p->hint);
509      if (p->compiled != NULL) pcre_free(p->compiled);
510      free(p);
511      }
512    }
513    
514    
515    /*************************************************
516    *           Free chain of file names             *
517    *************************************************/
518    
519    /*
520    Argument: pointer to start of chain
521    Returns:  nothing
522    */
523    
524    static void
525    free_file_chain(fnstr *fn)
526    {
527    while (fn != NULL)
528      {
529      fnstr *f = fn;
530      fn = f->next;
531      free(f);
532      }
533    }
534    
535    
536    /*************************************************
537  *            OS-specific functions               *  *            OS-specific functions               *
538  *************************************************/  *************************************************/
539    
540  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific.
541  although at present the only ones are for Unix, Win32, and for "no support". */  At present there are versions for Unix-style environments, Windows, native
542    z/OS, and "no support". */
543    
544    
545  /************* Directory scanning in Unix ***********/  /************* Directory scanning Unix-style and z/OS ***********/
546    
547  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H  #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
548  #include <sys/types.h>  #include <sys/types.h>
549  #include <sys/stat.h>  #include <sys/stat.h>
550  #include <dirent.h>  #include <dirent.h>
551    
552    #if defined NATIVE_ZOS
553    /************* Directory and PDS/E scanning for z/OS ***********/
554    /************* z/OS looks mostly like Unix with USS ************/
555    /* However, z/OS needs the #include statements in this header */
556    #include "pcrzosfs.h"
557    /* That header is not included in the main PCRE distribution because
558       other apparatus is needed to compile pcregrep for z/OS. The header
559       can be found in the special z/OS distribution, which is available
560       from www.zaconsultants.net or from www.cbttape.org. */
561    #endif
562    
563  typedef DIR directory_type;  typedef DIR directory_type;
564    #define FILESEP '/'
565    
566  static int  static int
567  isdirectory(char *filename)  isdirectory(char *filename)
# Line 280  isdirectory(char *filename) Line 569  isdirectory(char *filename)
569  struct stat statbuf;  struct stat statbuf;
570  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
571    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
572  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
573  }  }
574    
575  static directory_type *  static directory_type *
# Line 309  closedir(dir); Line 598  closedir(dir);
598  }  }
599    
600    
601  /************* Test for regular file in Unix **********/  /************* Test for regular file, Unix-style **********/
602    
603  static int  static int
604  isregfile(char *filename)  isregfile(char *filename)
# Line 321  return (statbuf.st_mode & S_IFMT) == S_I Line 610  return (statbuf.st_mode & S_IFMT) == S_I
610  }  }
611    
612    
613  /************* Test stdout for being a terminal in Unix **********/  #if defined NATIVE_ZOS
614    /************* Test for a terminal in z/OS **********/
615    /* isatty() does not work in a TSO environment, so always give FALSE.*/
616    
617  static BOOL  static BOOL
618  is_stdout_tty(void)  is_stdout_tty(void)
619  {  {
620    return FALSE;
621    }
622    
623    static BOOL
624    is_file_tty(FILE *f)
625    {
626    return FALSE;
627    }
628    
629    
630    /************* Test for a terminal, Unix-style **********/
631    
632    #else
633    static BOOL
634    is_stdout_tty(void)
635    {
636  return isatty(fileno(stdout));  return isatty(fileno(stdout));
637  }  }
638    
639    static BOOL
640    is_file_tty(FILE *f)
641    {
642    return isatty(fileno(f));
643    }
644    #endif
645    
646    /* End of Unix-style or native z/OS environment functions. */
647    
648    
649  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Windows ***********/
650    
651  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
652  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
653  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
654  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
655  */  The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
656    undefined when it is indeed undefined. */
657    
658  #elif HAVE_WINDOWS_H  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
659    
660  #ifndef STRICT  #ifndef STRICT
661  # define STRICT  # define STRICT
# Line 360  BOOL first; Line 677  BOOL first;
677  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
678  } directory_type;  } directory_type;
679    
680    #define FILESEP '/'
681    
682  int  int
683  isdirectory(char *filename)  isdirectory(char *filename)
684  {  {
685  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
686  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
687    return 0;    return 0;
688  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
689  }  }
690    
691  directory_type *  directory_type *
# Line 377  char *pattern; Line 696  char *pattern;
696  directory_type *dir;  directory_type *dir;
697  DWORD err;  DWORD err;
698  len = strlen(filename);  len = strlen(filename);
699  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
700  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
701  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
702    {    {
703    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
704    exit(2);    pcregrep_exit(2);
705    }    }
706  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
707  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 430  free(dir); Line 749  free(dir);
749  }  }
750    
751    
752  /************* Test for regular file in Win32 **********/  /************* Test for regular file in Windows **********/
753    
754  /* I don't know how to do this, or if it can be done; assume all paths are  /* I don't know how to do this, or if it can be done; assume all paths are
755  regular if they are not directories. */  regular if they are not directories. */
# Line 441  return !isdirectory(filename); Line 760  return !isdirectory(filename);
760  }  }
761    
762    
763  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Windows **********/
764    
765  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
766    
# Line 451  is_stdout_tty(void) Line 770  is_stdout_tty(void)
770  return FALSE;  return FALSE;
771  }  }
772    
773    static BOOL
774    is_file_tty(FILE *f)
775    {
776    return FALSE;
777    }
778    
779    /* End of Windows functions */
780    
781    
782  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
783    
# Line 458  return FALSE; Line 785  return FALSE;
785    
786  #else  #else
787    
788    #define FILESEP 0
789  typedef void directory_type;  typedef void directory_type;
790    
791  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
# Line 466  char *readdirectory(directory_type *dir) Line 794  char *readdirectory(directory_type *dir)
794  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
795    
796    
797  /************* Test for regular when we can't do it **********/  /************* Test for regular file when we can't do it **********/
798    
799  /* Assume all files are regular. */  /* Assume all files are regular. */
800    
801  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
802    
803    
804  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
805    
806  static BOOL  static BOOL
807  is_stdout_tty(void)  is_stdout_tty(void)
# Line 481  is_stdout_tty(void) Line 809  is_stdout_tty(void)
809  return FALSE;  return FALSE;
810  }  }
811    
812    static BOOL
813    is_file_tty(FILE *f)
814    {
815    return FALSE;
816    }
817    
818  #endif  #endif  /* End of system-specific functions */
819    
820    
821    
# Line 509  return sys_errlist[n]; Line 842  return sys_errlist[n];
842    
843    
844  /*************************************************  /*************************************************
845    *                Usage function                  *
846    *************************************************/
847    
848    static int
849    usage(int rc)
850    {
851    option_item *op;
852    fprintf(stderr, "Usage: pcregrep [-");
853    for (op = optionlist; op->one_char != 0; op++)
854      {
855      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
856      }
857    fprintf(stderr, "] [long options] [pattern] [files]\n");
858    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
859      "options.\n");
860    return rc;
861    }
862    
863    
864    
865    /*************************************************
866    *                Help function                   *
867    *************************************************/
868    
869    static void
870    help(void)
871    {
872    option_item *op;
873    
874    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
875    printf("Search for PATTERN in each FILE or standard input.\n");
876    printf("PATTERN must be present if neither -e nor -f is used.\n");
877    printf("\"-\" can be used as a file name to mean STDIN.\n");
878    
879    #ifdef SUPPORT_LIBZ
880    printf("Files whose names end in .gz are read using zlib.\n");
881    #endif
882    
883    #ifdef SUPPORT_LIBBZ2
884    printf("Files whose names end in .bz2 are read using bzlib2.\n");
885    #endif
886    
887    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
888    printf("Other files and the standard input are read as plain files.\n\n");
889    #else
890    printf("All files are read as plain files, without any interpretation.\n\n");
891    #endif
892    
893    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
894    printf("Options:\n");
895    
896    for (op = optionlist; op->one_char != 0; op++)
897      {
898      int n;
899      char s[4];
900    
901      /* Two options were accidentally implemented and documented with underscores
902      instead of hyphens in their names, something that was not noticed for quite a
903      few releases. When fixing this, I left the underscored versions in the list
904      in case people were using them. However, we don't want to display them in the
905      help data. There are no other options that contain underscores, and we do not
906      expect ever to implement such options. Therefore, just omit any option that
907      contains an underscore. */
908    
909      if (strchr(op->long_name, '_') != NULL) continue;
910    
911      if (op->one_char > 0 && (op->long_name)[0] == 0)
912        n = 31 - printf("  -%c", op->one_char);
913      else
914        {
915        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
916          else strcpy(s, "   ");
917        n = 31 - printf("  %s --%s", s, op->long_name);
918        }
919    
920      if (n < 1) n = 1;
921      printf("%.*s%s\n", n, "                           ", op->help_text);
922      }
923    
924    printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
925    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
926    printf("When reading patterns or file names from a file, trailing white\n");
927    printf("space is removed and blank lines are ignored.\n");
928    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
929    
930    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
931    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
932    }
933    
934    
935    
936    /*************************************************
937    *            Test exclude/includes               *
938    *************************************************/
939    
940    /* If any exclude pattern matches, the path is excluded. Otherwise, unless
941    there are no includes, the path must match an include pattern.
942    
943    Arguments:
944      path      the path to be matched
945      ip        the chain of include patterns
946      ep        the chain of exclude patterns
947    
948    Returns:    TRUE if the path is not excluded
949    */
950    
951    static BOOL
952    test_incexc(char *path, patstr *ip, patstr *ep)
953    {
954    int plen = strlen(path);
955    
956    for (; ep != NULL; ep = ep->next)
957      {
958      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
959        return FALSE;
960      }
961    
962    if (ip == NULL) return TRUE;
963    
964    for (; ip != NULL; ip = ip->next)
965      {
966      if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
967        return TRUE;
968      }
969    
970    return FALSE;
971    }
972    
973    
974    
975    /*************************************************
976    *         Decode integer argument value          *
977    *************************************************/
978    
979    /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
980    because SunOS4 doesn't have it. This is used only for unpicking arguments, so
981    just keep it simple.
982    
983    Arguments:
984      option_data   the option data string
985      op            the option item (for error messages)
986      longop        TRUE if option given in long form
987    
988    Returns:        a long integer
989    */
990    
991    static long int
992    decode_number(char *option_data, option_item *op, BOOL longop)
993    {
994    unsigned long int n = 0;
995    char *endptr = option_data;
996    while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
997    while (isdigit((unsigned char)(*endptr)))
998      n = n * 10 + (int)(*endptr++ - '0');
999    if (toupper(*endptr) == 'K')
1000      {
1001      n *= 1024;
1002      endptr++;
1003      }
1004    else if (toupper(*endptr) == 'M')
1005      {
1006      n *= 1024*1024;
1007      endptr++;
1008      }
1009    
1010    if (*endptr != 0)   /* Error */
1011      {
1012      if (longop)
1013        {
1014        char *equals = strchr(op->long_name, '=');
1015        int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1016          (int)(equals - op->long_name);
1017        fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1018          option_data, nlen, op->long_name);
1019        }
1020      else
1021        fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1022          option_data, op->one_char);
1023      pcregrep_exit(usage(2));
1024      }
1025    
1026    return n;
1027    }
1028    
1029    
1030    
1031    /*************************************************
1032    *       Add item to a chain of numbers           *
1033    *************************************************/
1034    
1035    /* Used to add an item onto a chain, or just return an unconnected item if the
1036    "after" argument is NULL.
1037    
1038    Arguments:
1039      n          the number to add
1040      after      if not NULL points to item to insert after
1041    
1042    Returns:     new number block
1043    */
1044    
1045    static omstr *
1046    add_number(int n, omstr *after)
1047    {
1048    omstr *om = (omstr *)malloc(sizeof(omstr));
1049    
1050    if (om == NULL)
1051      {
1052      fprintf(stderr, "pcregrep: malloc failed\n");
1053      pcregrep_exit(2);
1054      }
1055    om->next = NULL;
1056    om->groupnum = n;
1057    
1058    if (after != NULL)
1059      {
1060      om->next = after->next;
1061      after->next = om;
1062      }
1063    return om;
1064    }
1065    
1066    
1067    
1068    /*************************************************
1069    *            Read one line of input              *
1070    *************************************************/
1071    
1072    /* Normally, input is read using fread() into a large buffer, so many lines may
1073    be read at once. However, doing this for tty input means that no output appears
1074    until a lot of input has been typed. Instead, tty input is handled line by
1075    line. We cannot use fgets() for this, because it does not stop at a binary
1076    zero, and therefore there is no way of telling how many characters it has read,
1077    because there may be binary zeros embedded in the data.
1078    
1079    Arguments:
1080      buffer     the buffer to read into
1081      length     the maximum number of characters to read
1082      f          the file
1083    
1084    Returns:     the number of characters read, zero at end of file
1085    */
1086    
1087    static unsigned int
1088    read_one_line(char *buffer, int length, FILE *f)
1089    {
1090    int c;
1091    int yield = 0;
1092    while ((c = fgetc(f)) != EOF)
1093      {
1094      buffer[yield++] = c;
1095      if (c == '\n' || yield >= length) break;
1096      }
1097    return yield;
1098    }
1099    
1100    
1101    
1102    /*************************************************
1103  *             Find end of line                   *  *             Find end of line                   *
1104  *************************************************/  *************************************************/
1105    
# Line 520  Arguments: Line 1111  Arguments:
1111    endptr    end of available data    endptr    end of available data
1112    lenptr    where to put the length of the eol sequence    lenptr    where to put the length of the eol sequence
1113    
1114  Returns:    pointer to the last byte of the line  Returns:    pointer after the last byte of the line,
1115                including the newline byte(s)
1116  */  */
1117    
1118  static char *  static char *
# Line 589  switch(endlinetype) Line 1181  switch(endlinetype)
1181    
1182      switch (c)      switch (c)
1183        {        {
1184        case 0x0a:    /* LF */        case '\n':
1185        *lenptr = 1;        *lenptr = 1;
1186        return p;        return p;
1187    
1188        case 0x0d:    /* CR */        case '\r':
1189        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
1190          {          {
1191          *lenptr = 2;          *lenptr = 2;
1192          p++;          p++;
# Line 633  switch(endlinetype) Line 1225  switch(endlinetype)
1225    
1226      switch (c)      switch (c)
1227        {        {
1228        case 0x0a:    /* LF */        case '\n':    /* LF */
1229        case 0x0b:    /* VT */        case '\v':    /* VT */
1230        case 0x0c:    /* FF */        case '\f':    /* FF */
1231        *lenptr = 1;        *lenptr = 1;
1232        return p;        return p;
1233    
1234        case 0x0d:    /* CR */        case '\r':    /* CR */
1235        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
1236          {          {
1237          *lenptr = 2;          *lenptr = 2;
1238          p++;          p++;
# Line 648  switch(endlinetype) Line 1240  switch(endlinetype)
1240        else *lenptr = 1;        else *lenptr = 1;
1241        return p;        return p;
1242    
1243        case 0x85:    /* NEL */  #ifndef EBCDIC
1244          case 0x85:    /* Unicode NEL */
1245        *lenptr = utf8? 2 : 1;        *lenptr = utf8? 2 : 1;
1246        return p;        return p;
1247    
1248        case 0x2028:  /* LS */        case 0x2028:  /* Unicode LS */
1249        case 0x2029:  /* PS */        case 0x2029:  /* Unicode PS */
1250        *lenptr = 3;        *lenptr = 3;
1251        return p;        return p;
1252    #endif  /* Not EBCDIC */
1253    
1254        default:        default:
1255        break;        break;
# Line 705  switch(endlinetype) Line 1299  switch(endlinetype)
1299      while (p > startptr && p[-1] != '\n') p--;      while (p > startptr && p[-1] != '\n') p--;
1300      if (p <= startptr + 1 || p[-2] == '\r') return p;      if (p <= startptr + 1 || p[-2] == '\r') return p;
1301      }      }
1302    return p;   /* But control should never get here */    /* Control can never get here */
1303    
1304    case EL_ANY:    case EL_ANY:
1305    case EL_ANYCRLF:    case EL_ANYCRLF:
# Line 714  switch(endlinetype) Line 1308  switch(endlinetype)
1308    
1309    while (p > startptr)    while (p > startptr)
1310      {      {
1311      register int c;      register unsigned int c;
1312      char *pp = p - 1;      char *pp = p - 1;
1313    
1314      if (utf8)      if (utf8)
# Line 739  switch(endlinetype) Line 1333  switch(endlinetype)
1333    
1334      if (endlinetype == EL_ANYCRLF) switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
1335        {        {
1336        case 0x0a:    /* LF */        case '\n':    /* LF */
1337        case 0x0d:    /* CR */        case '\r':    /* CR */
1338        return p;        return p;
1339    
1340        default:        default:
# Line 749  switch(endlinetype) Line 1343  switch(endlinetype)
1343    
1344      else switch (c)      else switch (c)
1345        {        {
1346        case 0x0a:    /* LF */        case '\n':    /* LF */
1347        case 0x0b:    /* VT */        case '\v':    /* VT */
1348        case 0x0c:    /* FF */        case '\f':    /* FF */
1349        case 0x0d:    /* CR */        case '\r':    /* CR */
1350        case 0x85:    /* NEL */  #ifndef EBCDIE
1351        case 0x2028:  /* LS */        case 0x85:    /* Unicode NEL */
1352        case 0x2029:  /* PS */        case 0x2028:  /* Unicode LS */
1353          case 0x2029:  /* Unicode PS */
1354    #endif  /* Not EBCDIC */
1355        return p;        return p;
1356    
1357        default:        default:
# Line 790  Arguments: Line 1386  Arguments:
1386  Returns:            nothing  Returns:            nothing
1387  */  */
1388    
1389  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,  static void
1390    char *endptr, char *printname)  do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1391      char *printname)
1392  {  {
1393  if (after_context > 0 && lastmatchnumber > 0)  if (after_context > 0 && lastmatchnumber > 0)
1394    {    {
# Line 803  if (after_context > 0 && lastmatchnumber Line 1400  if (after_context > 0 && lastmatchnumber
1400      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
1401      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1402      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
1403      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1404      lastmatchrestart = pp;      lastmatchrestart = pp;
1405      }      }
1406    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 813  if (after_context > 0 && lastmatchnumber Line 1410  if (after_context > 0 && lastmatchnumber
1410    
1411    
1412  /*************************************************  /*************************************************
1413    *   Apply patterns to subject till one matches   *
1414    *************************************************/
1415    
1416    /* This function is called to run through all patterns, looking for a match. It
1417    is used multiple times for the same subject when colouring is enabled, in order
1418    to find all possible matches.
1419    
1420    Arguments:
1421      matchptr     the start of the subject
1422      length       the length of the subject to match
1423      options      options for pcre_exec
1424      startoffset  where to start matching
1425      offsets      the offets vector to fill in
1426      mrc          address of where to put the result of pcre_exec()
1427    
1428    Returns:      TRUE if there was a match
1429                  FALSE if there was no match
1430                  invert if there was a non-fatal error
1431    */
1432    
1433    static BOOL
1434    match_patterns(char *matchptr, size_t length, unsigned int options,
1435      int startoffset, int *offsets, int *mrc)
1436    {
1437    int i;
1438    size_t slen = length;
1439    patstr *p = patterns;
1440    const char *msg = "this text:\n\n";
1441    
1442    if (slen > 200)
1443      {
1444      slen = 200;
1445      msg = "text that starts:\n\n";
1446      }
1447    for (i = 1; p != NULL; p = p->next, i++)
1448      {
1449      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1450        startoffset, options, offsets, OFFSET_SIZE);
1451      if (*mrc >= 0) return TRUE;
1452      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1453      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1454      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1455      fprintf(stderr, "%s", msg);
1456      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1457      fprintf(stderr, "\n\n");
1458      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1459          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1460        resource_error = TRUE;
1461      if (error_count++ > 20)
1462        {
1463        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1464        pcregrep_exit(2);
1465        }
1466      return invert;    /* No more matching; don't show the line again */
1467      }
1468    
1469    return FALSE;  /* No match, no errors */
1470    }
1471    
1472    
1473    
1474    /*************************************************
1475  *            Grep an individual file             *  *            Grep an individual file             *
1476  *************************************************/  *************************************************/
1477    
1478  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1479  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1480  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1481  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1482  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
# Line 828  Arguments: Line 1487  Arguments:
1487                 the gzFile pointer when reading is via libz                 the gzFile pointer when reading is via libz
1488                 the BZFILE pointer when reading is via libbz2                 the BZFILE pointer when reading is via libbz2
1489    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1490      filename     the file name or NULL (for errors)
1491    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1492                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1493                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1494    
1495  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1496                 1 otherwise (no matches)                 1 otherwise (no matches)
1497                 2 if there is a read error on a .bz2 file                 2 if an overlong line is encountered
1498                   3 if there is a read error on a .bz2 file
1499  */  */
1500    
1501  static int  static int
1502  pcregrep(void *handle, int frtype, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1503  {  {
1504  int rc = 1;  int rc = 1;
1505  int linenumber = 1;  int linenumber = 1;
1506  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1507  int count = 0;  int count = 0;
1508  int filepos = 0;  int filepos = 0;
1509  int offsets[99];  int offsets[OFFSET_SIZE];
1510  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1511  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1512  char *endptr;  char *endptr;
1513  size_t bufflength;  size_t bufflength;
1514    BOOL binary = FALSE;
1515  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1516    BOOL input_line_buffered = line_buffered;
1517  FILE *in = NULL;                    /* Ensure initialized */  FILE *in = NULL;                    /* Ensure initialized */
1518    
1519  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 868  of what we have. In the case of libz, a Line 1530  of what we have. In the case of libz, a
1530  plain file. However, if a .bz2 file isn't actually bzipped, the first read will  plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1531  fail. */  fail. */
1532    
1533    (void)frtype;
1534    
1535  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
1536  if (frtype == FR_LIBZ)  if (frtype == FR_LIBZ)
1537    {    {
1538    ingz = (gzFile)handle;    ingz = (gzFile)handle;
1539    bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);    bufflength = gzread (ingz, main_buffer, bufsize);
1540    }    }
1541  else  else
1542  #endif  #endif
# Line 881  else Line 1545  else
1545  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
1546    {    {
1547    inbz2 = (BZFILE *)handle;    inbz2 = (BZFILE *)handle;
1548    bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);    bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1549    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1550    }                                    /* without the cast it is unsigned. */    }                                    /* without the cast it is unsigned. */
1551  else  else
# Line 889  else Line 1553  else
1553    
1554    {    {
1555    in = (FILE *)handle;    in = (FILE *)handle;
1556    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);    if (is_file_tty(in)) input_line_buffered = TRUE;
1557      bufflength = input_line_buffered?
1558        read_one_line(main_buffer, bufsize, in) :
1559        fread(main_buffer, 1, bufsize, in);
1560    }    }
1561    
1562  endptr = buffer + bufflength;  endptr = main_buffer + bufflength;
1563    
1564    /* Unless binary-files=text, see if we have a binary file. This uses the same
1565    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1566    file. */
1567    
1568    if (binary_files != BIN_TEXT)
1569      {
1570      binary =
1571        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1572      if (binary && binary_files == BIN_NOMATCH) return 1;
1573      }
1574    
1575  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1576  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 901  way, the buffer is shifted left and re-f Line 1579  way, the buffer is shifted left and re-f
1579    
1580  while (ptr < endptr)  while (ptr < endptr)
1581    {    {
1582    int i, endlinelength;    int endlinelength;
1583    int mrc = 0;    int mrc = 0;
1584    BOOL match = FALSE;    int startoffset = 0;
1585      unsigned int options = 0;
1586      BOOL match;
1587    char *matchptr = ptr;    char *matchptr = ptr;
1588    char *t = ptr;    char *t = ptr;
1589    size_t length, linelength;    size_t length, linelength;
# Line 911  while (ptr < endptr) Line 1591  while (ptr < endptr)
1591    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1592    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1593    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1594    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1595    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1596    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1597      first line. */
1598    
1599    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1600    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1601    length = multiline? (size_t)(endptr - ptr) : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1602    
1603      /* Check to see if the line we are looking at extends right to the very end
1604      of the buffer without a line terminator. This means the line is too long to
1605      handle. */
1606    
1607      if (endlinelength == 0 && t == main_buffer + bufsize)
1608        {
1609        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1610                        "pcregrep: check the --buffer-size option\n",
1611                        linenumber,
1612                        (filename == NULL)? "" : " of file ",
1613                        (filename == NULL)? "" : filename);
1614        return 2;
1615        }
1616    
1617    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1618    
1619  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
1620    if (jfriedl_XT || jfriedl_XR)    if (jfriedl_XT || jfriedl_XR)
1621    {    {
1622        #include <sys/time.h>  #     include <sys/time.h>
1623        #include <time.h>  #     include <time.h>
1624        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1625        struct timezone dummy;        struct timezone dummy;
1626          int i;
1627    
1628        if (jfriedl_XT)        if (jfriedl_XT)
1629        {        {
# Line 936  while (ptr < endptr) Line 1632  while (ptr < endptr)
1632            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1633            if (!ptr) {            if (!ptr) {
1634                    printf("out of memory");                    printf("out of memory");
1635                    exit(2);                    pcregrep_exit(2);
1636            }            }
1637            endptr = ptr;            endptr = ptr;
1638            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 953  while (ptr < endptr) Line 1649  while (ptr < endptr)
1649    
1650    
1651        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1652            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1653                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1654    
1655        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1656                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 967  while (ptr < endptr) Line 1664  while (ptr < endptr)
1664    }    }
1665  #endif  #endif
1666    
1667    /* We come back here after a match when the -o option (only_matching) is set,    /* We come back here after a match when show_only_matching is set, in order
1668    in order to find any further matches in the same line. */    to find any further matches in the same line. This applies to
1669      --only-matching, --file-offsets, and --line-offsets. */
1670    
1671    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1672    
1673    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches or there is an error other
1674    the final newline in the subject string. */    than NOMATCH. This code is in a subroutine so that it can be re-used for
1675      finding subsequent matches when colouring matched lines. After finding one
1676      match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1677      this line. */
1678    
1679    for (i = 0; i < pattern_count; i++)    match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1680      {    options = PCRE_NOTEMPTY;
     mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1681    
1682    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1683    
# Line 1018  while (ptr < endptr) Line 1693  while (ptr < endptr)
1693    
1694      if (count_only) count++;      if (count_only) count++;
1695    
1696        /* When handling a binary file and binary-files==binary, the "binary"
1697        variable will be set true (it's false in all other cases). In this
1698        situation we just want to output the file name. No need to scan further. */
1699    
1700        else if (binary)
1701          {
1702          fprintf(stdout, "Binary file %s matches\n", filename);
1703          return 0;
1704          }
1705    
1706      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1707      in the file. */      in the file. */
1708    
1709      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1710        {        {
1711        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1712        return 0;        return 0;
# Line 1031  while (ptr < endptr) Line 1716  while (ptr < endptr)
1716    
1717      else if (quiet) return 0;      else if (quiet) return 0;
1718    
1719      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched,
1720      the --file-offsets and --line-offsets options output offsets for the      and/or one or more captured portions of it, as long as these strings are
1721      matching substring (they both force --only-matching). None of these options      not empty. The --file-offsets and --line-offsets options output offsets for
1722      prints any context. Afterwards, adjust the start and length, and then jump      the matching substring (all three set show_only_matching). None of these
1723      back to look for further matches in the same line. If we are in invert      mutually exclusive options prints any context. Afterwards, adjust the start
1724      mode, however, nothing is printed - this could be still useful because the      and then jump back to look for further matches in the same line. If we are
1725      return code is set. */      in invert mode, however, nothing is printed and we do not restart - this
1726        could still be useful because the return code is set. */
1727    
1728      else if (only_matching)      else if (show_only_matching)
1729        {        {
1730        if (!invert)        if (!invert)
1731          {          {
1732          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1733          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1734    
1735            /* Handle --line-offsets */
1736    
1737          if (line_offsets)          if (line_offsets)
1738            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1739              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1740    
1741            /* Handle --file-offsets */
1742    
1743          else if (file_offsets)          else if (file_offsets)
1744            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n",
1745                (int)(filepos + matchptr + offsets[0] - ptr),
1746              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1747    
1748            /* Handle --only-matching, which may occur many times */
1749    
1750          else          else
1751            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            {
1752          fprintf(stdout, "\n");            BOOL printed = FALSE;
1753          matchptr += offsets[1];            omstr *om;
1754          length -= offsets[1];  
1755              for (om = only_matching; om != NULL; om = om->next)
1756                {
1757                int n = om->groupnum;
1758                if (n < mrc)
1759                  {
1760                  int plen = offsets[2*n + 1] - offsets[2*n];
1761                  if (plen > 0)
1762                    {
1763                    if (printed) fprintf(stdout, "%s", om_separator);
1764                    if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1765                    FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1766                    if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1767                    printed = TRUE;
1768                    }
1769                  }
1770                }
1771    
1772              if (printed || printname != NULL || number) fprintf(stdout, "\n");
1773              }
1774    
1775            /* Prepare to repeat to find the next match */
1776    
1777          match = FALSE;          match = FALSE;
1778            if (line_buffered) fflush(stdout);
1779            rc = 0;                      /* Had some success */
1780            startoffset = offsets[1];    /* Restart after the match */
1781          goto ONLY_MATCHING_RESTART;          goto ONLY_MATCHING_RESTART;
1782          }          }
1783        }        }
# Line 1092  while (ptr < endptr) Line 1813  while (ptr < endptr)
1813            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1814            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1815            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1816            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1817            lastmatchrestart = pp;            lastmatchrestart = pp;
1818            }            }
1819          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1115  while (ptr < endptr) Line 1836  while (ptr < endptr)
1836          int linecount = 0;          int linecount = 0;
1837          char *p = ptr;          char *p = ptr;
1838    
1839          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1840                 linecount < before_context)                 linecount < before_context)
1841            {            {
1842            linecount++;            linecount++;
1843            p = previous_line(p, buffer);            p = previous_line(p, main_buffer);
1844            }            }
1845    
1846          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 1132  while (ptr < endptr) Line 1853  while (ptr < endptr)
1853            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1854            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1855            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1856            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1857            p = pp;            p = pp;
1858            }            }
1859          }          }
# Line 1152  while (ptr < endptr) Line 1873  while (ptr < endptr)
1873        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1874        the match will always be before the first newline sequence. */        the match will always be before the first newline sequence. */
1875    
1876        if (multiline)        if (multiline & !invert)
1877          {          {
1878          int ellength;          char *endmatch = ptr + offsets[1];
1879          char *endmatch = ptr;          t = ptr;
1880          if (!invert)          while (t <= endmatch)
1881            {            {
1882            endmatch += offsets[1];            t = end_of_line(t, endptr, &endlinelength);
1883            t = ptr;            if (t < endmatch) linenumber++; else break;
           while (t < endmatch)  
             {  
             t = end_of_line(t, endptr, &ellength);  
             if (t <= endmatch) linenumber++; else break;  
             }  
1884            }            }
1885          endmatch = end_of_line(endmatch, endptr, &ellength);          linelength = t - ptr - endlinelength;
         linelength = endmatch - ptr - ellength;  
1886          }          }
1887    
1888        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 1182  while (ptr < endptr) Line 1897  while (ptr < endptr)
1897          {          {
1898          int first = S_arg * 2;          int first = S_arg * 2;
1899          int last  = first + 1;          int last  = first + 1;
1900          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1901          fprintf(stdout, "X");          fprintf(stdout, "X");
1902          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1903          }          }
1904        else        else
1905  #endif  #endif
1906    
1907        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1908          matches, but not of course if the line is a non-match. */
1909    
1910        if (do_colour)        if (do_colour && !invert)
1911          {          {
1912          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1913            FWRITE(ptr, 1, offsets[0], stdout);
1914          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1915          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1916          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1917          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1918            stdout);            {
1919              startoffset = offsets[1];
1920              if (startoffset >= (int)linelength + endlinelength ||
1921                  !match_patterns(matchptr, length, options, startoffset, offsets,
1922                    &mrc))
1923                break;
1924              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1925              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1926              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1927              fprintf(stdout, "%c[00m", 0x1b);
1928              }
1929    
1930            /* In multiline mode, we may have already printed the complete line
1931            and its line-ending characters (if they matched the pattern), so there
1932            may be no more to print. */
1933    
1934            plength = (int)((linelength + endlinelength) - startoffset);
1935            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1936          }          }
1937        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1938          /* Not colouring; no need to search for further matches */
1939    
1940          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1941        }        }
1942    
1943      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1944        given, flush the output. */
1945    
1946        if (line_buffered) fflush(stdout);
1947      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1948    
1949      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1236  while (ptr < endptr) Line 1975  while (ptr < endptr)
1975    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1976    
1977    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1978    filepos += linelength + endlinelength;    filepos += (int)(linelength + endlinelength);
1979    linenumber++;    linenumber++;
1980    
1981      /* If input is line buffered, and the buffer is not yet full, read another
1982      line and add it into the buffer. */
1983    
1984      if (input_line_buffered && bufflength < (size_t)bufsize)
1985        {
1986        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1987        bufflength += add;
1988        endptr += add;
1989        }
1990    
1991    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1992    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1993    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1994    about to be lost, print them. */    about to be lost, print them. */
1995    
1996    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1997      {      {
1998      if (after_context > 0 &&      if (after_context > 0 &&
1999          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
2000          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
2001        {        {
2002        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2003        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 1256  while (ptr < endptr) Line 2005  while (ptr < endptr)
2005    
2006      /* Now do the shuffle */      /* Now do the shuffle */
2007    
2008      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2009      ptr -= MBUFTHIRD;      ptr -= bufthird;
2010    
2011  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
2012      if (frtype == FR_LIBZ)      if (frtype == FR_LIBZ)
2013        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
2014          gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);          gzread (ingz, main_buffer + 2*bufthird, bufthird);
2015      else      else
2016  #endif  #endif
2017    
2018  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
2019      if (frtype == FR_LIBBZ2)      if (frtype == FR_LIBBZ2)
2020        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
2021          BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);          BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2022      else      else
2023  #endif  #endif
2024    
2025      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*bufthird +
2026          (input_line_buffered?
2027      endptr = buffer + bufflength;         read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2028           fread(main_buffer + 2*bufthird, 1, bufthird, in));
2029        endptr = main_buffer + bufflength;
2030    
2031      /* Adjust any last match point */      /* Adjust any last match point */
2032    
2033      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2034      }      }
2035    }     /* Loop through the whole file */    }     /* Loop through the whole file */
2036    
2037  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
2038  hyphenpending if it prints something. */  hyphenpending if it prints something. */
2039    
2040  if (!only_matching && !count_only)  if (!show_only_matching && !count_only)
2041    {    {
2042    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2043    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1305  if (filenames == FN_NOMATCH_ONLY) Line 2056  if (filenames == FN_NOMATCH_ONLY)
2056    
2057  if (count_only)  if (count_only)
2058    {    {
2059    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
2060    fprintf(stdout, "%d\n", count);      {
2061        if (printname != NULL && filenames != FN_NONE)
2062          fprintf(stdout, "%s:", printname);
2063        fprintf(stdout, "%d\n", count);
2064        }
2065    }    }
2066    
2067  return rc;  return rc;
# Line 1326  Arguments: Line 2081  Arguments:
2081    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
2082    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
2083    
2084  Returns:   0 if there was at least one match  Returns:  -1 the file/directory was skipped
2085               0 if there was at least one match
2086             1 if there were no matches             1 if there were no matches
2087             2 there was some kind of error             2 there was some kind of error
2088    
# Line 1337  static int Line 2093  static int
2093  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2094  {  {
2095  int rc = 1;  int rc = 1;
 int sep;  
2096  int frtype;  int frtype;
 int pathlen;  
2097  void *handle;  void *handle;
2098    char *lastcomp;
2099  FILE *in = NULL;           /* Ensure initialized */  FILE *in = NULL;           /* Ensure initialized */
2100    
2101  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 1351  gzFile ingz = NULL; Line 2106  gzFile ingz = NULL;
2106  BZFILE *inbz2 = NULL;  BZFILE *inbz2 = NULL;
2107  #endif  #endif
2108    
2109    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2110    int pathlen;
2111    #endif
2112    
2113    #if defined NATIVE_ZOS
2114    int zos_type;
2115    FILE *zos_test_file;
2116    #endif
2117    
2118  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
2119    
2120  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
2121    {    {
2122    return pcregrep(stdin, FR_PLAIN,    return pcregrep(stdin, FR_PLAIN, stdin_name,
2123      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2124        stdin_name : NULL);        stdin_name : NULL);
2125    }    }
2126    
2127  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2128  each file within it, subject to any include or exclude patterns that were set.  directories, whereas --include and --exclude apply to everything else. The test
2129  The scanning code is localized so it can be made system-specific. */  is against the final component of the path. */
2130    
2131    lastcomp = strrchr(pathname, FILESEP);
2132    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2133    
2134    /* If the file is a directory, skip if not recursing or if explicitly excluded.
2135    Otherwise, scan the directory and recurse for each path within it. The scanning
2136    code is localized so it can be made system-specific. */
2137    
2138    
2139    /* For z/OS, determine the file type. */
2140    
2141    #if defined NATIVE_ZOS
2142    zos_test_file =  fopen(pathname,"rb");
2143    
2144    if (zos_test_file == NULL)
2145       {
2146       if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2147         pathname, strerror(errno));
2148       return -1;
2149       }
2150    zos_type = identifyzosfiletype (zos_test_file);
2151    fclose (zos_test_file);
2152    
2153    /* Handle a PDS in separate code */
2154    
2155    if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2156       {
2157       return travelonpdsdir (pathname, only_one_at_top);
2158       }
2159    
2160    /* Deal with regular files in the normal way below. These types are:
2161       zos_type == __ZOS_PDS_MEMBER
2162       zos_type == __ZOS_PS
2163       zos_type == __ZOS_VSAM_KSDS
2164       zos_type == __ZOS_VSAM_ESDS
2165       zos_type == __ZOS_VSAM_RRDS
2166    */
2167    
2168    /* Handle a z/OS directory using common code. */
2169    
2170    else if (zos_type == __ZOS_HFS)
2171     {
2172    #endif  /* NATIVE_ZOS */
2173    
2174    
2175  if ((sep = isdirectory(pathname)) != 0)  /* Handle directories: common code for all OS */
2176    
2177    if (isdirectory(pathname))
2178    {    {
2179    if (dee_action == dee_SKIP) return 1;    if (dee_action == dee_SKIP ||
2180          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2181        return -1;
2182    
2183    if (dee_action == dee_RECURSE)    if (dee_action == dee_RECURSE)
2184      {      {
2185      char buffer[1024];      char buffer[1024];
# Line 1383  if ((sep = isdirectory(pathname)) != 0) Line 2196  if ((sep = isdirectory(pathname)) != 0)
2196    
2197      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
2198        {        {
2199        int frc, blen;        int frc;
2200        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
       blen = strlen(buffer);  
   
       if (exclude_compiled != NULL &&  
           pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
         continue;  
   
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
   
2201        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2202        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
2203         else if (frc == 0 && rc == 1) rc = 0;         else if (frc == 0 && rc == 1) rc = 0;
# Line 1405  if ((sep = isdirectory(pathname)) != 0) Line 2208  if ((sep = isdirectory(pathname)) != 0)
2208      }      }
2209    }    }
2210    
2211  /* If the file is not a directory and not a regular file, skip it if that's  #if defined NATIVE_ZOS
2212  been requested. */   }
2213    #endif
2214    
2215  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;  /* If the file is not a directory, check for a regular file, and if it is not,
2216    skip it if that's been requested. Otherwise, check for an explicit inclusion or
2217    exclusion. */
2218    
2219    else if (
2220    #if defined NATIVE_ZOS
2221            (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2222    #else  /* all other OS */
2223            (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2224    #endif
2225            !test_incexc(lastcomp, include_patterns, exclude_patterns))
2226      return -1;  /* File skipped */
2227    
2228  /* Control reaches here if we have a regular file, or if we have a directory  /* Control reaches here if we have a regular file, or if we have a directory
2229  and recursion or skipping was not requested, or if we have anything else and  and recursion or skipping was not requested, or if we have anything else and
# Line 1416  skipping was not requested. The scan pro Line 2231  skipping was not requested. The scan pro
2231  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
2232  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
2233    
2234  pathlen = strlen(pathname);  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2235    pathlen = (int)(strlen(pathname));
2236    #endif
2237    
2238  /* Open using zlib if it is supported and the file name ends with .gz. */  /* Open using zlib if it is supported and the file name ends with .gz. */
2239    
# Line 1456  an attempt to read a .bz2 file indicates Line 2273  an attempt to read a .bz2 file indicates
2273  PLAIN_FILE:  PLAIN_FILE:
2274  #endif  #endif
2275    {    {
2276    in = fopen(pathname, "r");    in = fopen(pathname, "rb");
2277    handle = (void *)in;    handle = (void *)in;
2278    frtype = FR_PLAIN;    frtype = FR_PLAIN;
2279    }    }
# Line 1473  if (handle == NULL) Line 2290  if (handle == NULL)
2290    
2291  /* Now grep the file */  /* Now grep the file */
2292    
2293  rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||  rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2294    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2295    
2296  /* Close in an appropriate manner. */  /* Close in an appropriate manner. */
# Line 1484  if (frtype == FR_LIBZ) Line 2301  if (frtype == FR_LIBZ)
2301  else  else
2302  #endif  #endif
2303    
2304  /* If it is a .bz2 file and the result is 2, it means that the first attempt to  /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2305  read failed. If the error indicates that the file isn't in fact bzipped, try  read failed. If the error indicates that the file isn't in fact bzipped, try
2306  again as a normal file. */  again as a normal file. */
2307    
2308  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
2309  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
2310    {    {
2311    if (rc == 2)    if (rc == 3)
2312      {      {
2313      int errnum;      int errnum;
2314      const char *err = BZ2_bzerror(inbz2, &errnum);      const char *err = BZ2_bzerror(inbz2, &errnum);
# Line 1503  if (frtype == FR_LIBBZ2) Line 2320  if (frtype == FR_LIBBZ2)
2320      else if (!silent)      else if (!silent)
2321        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2322          pathname, err);          pathname, err);
2323        rc = 2;    /* The normal "something went wrong" code */
2324      }      }
2325    BZ2_bzclose(inbz2);    BZ2_bzclose(inbz2);
2326    }    }
# Line 1520  return rc; Line 2338  return rc;
2338    
2339    
2340    
   
 /*************************************************  
 *                Usage function                  *  
 *************************************************/  
   
 static int  
 usage(int rc)  
 {  
 option_item *op;  
 fprintf(stderr, "Usage: pcregrep [-");  
 for (op = optionlist; op->one_char != 0; op++)  
   {  
   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);  
   }  
 fprintf(stderr, "] [long options] [pattern] [files]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information and the long "  
   "options.\n");  
 return rc;  
 }  
   
   
   
   
 /*************************************************  
 *                Help function                   *  
 *************************************************/  
   
 static void  
 help(void)  
 {  
 option_item *op;  
   
 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  
 printf("Search for PATTERN in each FILE or standard input.\n");  
 printf("PATTERN must be present if neither -e nor -f is used.\n");  
 printf("\"-\" can be used as a file name to mean STDIN.\n");  
   
 #ifdef SUPPORT_LIBZ  
 printf("Files whose names end in .gz are read using zlib.\n");  
 #endif  
   
 #ifdef SUPPORT_LIBBZ2  
 printf("Files whose names end in .bz2 are read using bzlib2.\n");  
 #endif  
   
 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2  
 printf("Other files and the standard input are read as plain files.\n\n");  
 #else  
 printf("All files are read as plain files, without any interpretation.\n\n");  
 #endif  
   
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
 printf("Options:\n");  
   
 for (op = optionlist; op->one_char != 0; op++)  
   {  
   int n;  
   char s[4];  
   if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
   n = 30 - printf("  %s --%s", s, op->long_name);  
   if (n < 1) n = 1;  
   printf("%.*s%s\n", n, "                    ", op->help_text);  
   }  
   
 printf("\nWhen reading patterns from a file instead of using a command line option,\n");  
 printf("trailing white space is removed and blank lines are ignored.\n");  
 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
 }  
   
   
   
   
2341  /*************************************************  /*************************************************
2342  *    Handle a single-letter, no data option      *  *    Handle a single-letter, no data option      *
2343  *************************************************/  *************************************************/
# Line 1605  handle_option(int letter, int options) Line 2348  handle_option(int letter, int options)
2348  switch(letter)  switch(letter)
2349    {    {
2350    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
2351    case N_HELP: help(); exit(0);    case N_HELP: help(); pcregrep_exit(0);
2352      case N_LBUFFER: line_buffered = TRUE; break;
2353    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
2354      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2355      case 'a': binary_files = BIN_TEXT; break;
2356    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
2357    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
2358    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
2359      case 'I': binary_files = BIN_NOMATCH; break;
2360    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
2361    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
2362    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2363    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
2364    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2365    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
2366    case 'o': only_matching = TRUE; break;  
2367      case 'o':
2368      only_matching_last = add_number(0, only_matching_last);
2369      if (only_matching == NULL) only_matching = only_matching_last;
2370      break;
2371    
2372    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
2373    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
2374    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1626  switch(letter) Line 2378  switch(letter)
2378    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
2379    
2380    case 'V':    case 'V':
2381    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stdout, "pcregrep version %s\n", pcre_version());
2382    exit(0);    pcregrep_exit(0);
2383    break;    break;
2384    
2385    default:    default:
2386    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2387    exit(usage(2));    pcregrep_exit(usage(2));
2388    }    }
2389    
2390  return options;  return options;
# Line 1670  return buffer; Line 2422  return buffer;
2422  *          Compile a single pattern              *  *          Compile a single pattern              *
2423  *************************************************/  *************************************************/
2424    
2425  /* When the -F option has been used, this is called for each substring.  /* Do nothing if the pattern has already been compiled. This is the case for
2426  Otherwise it's called for each supplied pattern.  include/exclude patterns read from a file.
2427    
2428    When the -F option has been used, each "pattern" may be a list of strings,
2429    separated by line breaks. They will be matched literally. We split such a
2430    string and compile the first substring, inserting an additional block into the
2431    pattern chain.
2432    
2433  Arguments:  Arguments:
2434    pattern        the pattern string    p              points to the pattern block
2435    options        the PCRE options    options        the PCRE options
2436    filename       the file name, or NULL for a command-line pattern    popts          the processing options
2437      fromfile       TRUE if the pattern was read from a file
2438      fromtext       file name or identifying text (e.g. "include")
2439    count          0 if this is the only command line pattern, or    count          0 if this is the only command line pattern, or
2440                   number of the command line pattern, or                   number of the command line pattern, or
2441                   linenumber for a pattern from a file                   linenumber for a pattern from a file
# Line 1685  Returns:         TRUE on success, FALSE Line 2444  Returns:         TRUE on success, FALSE
2444  */  */
2445    
2446  static BOOL  static BOOL
2447  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_pattern(patstr *p, int options, int popts, int fromfile,
2448      const char *fromtext, int count)
2449  {  {
2450  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
2451  const char *error;  const char *error;
2452    char *ps = p->string;
2453    int patlen = strlen(ps);
2454  int errptr;  int errptr;
2455    
2456  if (pattern_count >= MAX_PATTERN_COUNT)  if (p->compiled != NULL) return TRUE;
   {  
   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",  
     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);  
   return FALSE;  
   }  
2457    
2458  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  if ((popts & PO_FIXED_STRINGS) != 0)
   suffix[process_options]);  
 pattern_list[pattern_count] =  
   pcre_compile(buffer, options, &error, &errptr, pcretables);  
 if (pattern_list[pattern_count] != NULL)  
2459    {    {
2460    pattern_count++;    int ellength;
2461    return TRUE;    char *eop = ps + patlen;
2462      char *pe = end_of_line(ps, eop, &ellength);
2463    
2464      if (ellength != 0)
2465        {
2466        if (add_pattern(pe, p) == NULL) return FALSE;
2467        patlen = (int)(pe - ps - ellength);
2468        }
2469    }    }
2470    
2471    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2472    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2473    if (p->compiled != NULL) return TRUE;
2474    
2475  /* Handle compile errors */  /* Handle compile errors */
2476    
2477  errptr -= (int)strlen(prefix[process_options]);  errptr -= (int)strlen(prefix[popts]);
2478  if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);  if (errptr > patlen) errptr = patlen;
2479    
2480  if (filename == NULL)  if (fromfile)
2481    {    {
2482    if (count == 0)    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2483      fprintf(stderr, "pcregrep: Error in command-line regex "      "at offset %d: %s\n", count, fromtext, errptr, error);
       "at offset %d: %s\n", errptr, error);  
   else  
     fprintf(stderr, "pcregrep: Error in %s command-line regex "  
       "at offset %d: %s\n", ordin(count), errptr, error);  
2484    }    }
2485  else  else
2486    {    {
2487    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "    if (count == 0)
2488      "at offset %d: %s\n", count, filename, errptr, error);      fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2489          fromtext, errptr, error);
2490      else
2491        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2492          ordin(count), fromtext, errptr, error);
2493    }    }
2494    
2495  return FALSE;  return FALSE;
# Line 1734  return FALSE; Line 2498  return FALSE;
2498    
2499    
2500  /*************************************************  /*************************************************
2501  *           Compile one supplied pattern         *  *     Read and compile a file of patterns        *
2502  *************************************************/  *************************************************/
2503    
2504  /* When the -F option has been used, each string may be a list of strings,  /* This is used for --filelist, --include-from, and --exclude-from.
 separated by line breaks. They will be matched literally.  
2505    
2506  Arguments:  Arguments:
2507    pattern        the pattern string    name         the name of the file; "-" is stdin
2508    options        the PCRE options    patptr       pointer to the pattern chain anchor
2509    filename       the file name, or NULL for a command-line pattern    patlastptr   pointer to the last pattern pointer
2510    count          0 if this is the only command line pattern, or    popts        the process options to pass to pattern_compile()
                  number of the command line pattern, or  
                  linenumber for a pattern from a file  
2511    
2512  Returns:         TRUE on success, FALSE after an error  Returns:       TRUE if all went well
2513  */  */
2514    
2515  static BOOL  static BOOL
2516  compile_pattern(char *pattern, int options, char *filename, int count)  read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2517  {  {
2518  if ((process_options & PO_FIXED_STRINGS) != 0)  int linenumber = 0;
2519    FILE *f;
2520    char *filename;
2521    char buffer[PATBUFSIZE];
2522    
2523    if (strcmp(name, "-") == 0)
2524      {
2525      f = stdin;
2526      filename = stdin_name;
2527      }
2528    else
2529      {
2530      f = fopen(name, "r");
2531      if (f == NULL)
2532        {
2533        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2534        return FALSE;
2535        }
2536      filename = name;
2537      }
2538    
2539    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2540    {    {
2541    char *eop = pattern + strlen(pattern);    char *s = buffer + (int)strlen(buffer);
2542    char buffer[MBUFTHIRD];    while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2543      *s = 0;
2544      linenumber++;
2545      if (buffer[0] == 0) continue;   /* Skip blank lines */
2546    
2547      /* Note: this call to add_pattern() puts a pointer to the local variable
2548      "buffer" into the pattern chain. However, that pointer is used only when
2549      compiling the pattern, which happens immediately below, so we flatten it
2550      afterwards, as a precaution against any later code trying to use it. */
2551    
2552      *patlastptr = add_pattern(buffer, *patlastptr);
2553      if (*patlastptr == NULL)
2554        {
2555        if (f != stdin) fclose(f);
2556        return FALSE;
2557        }
2558      if (*patptr == NULL) *patptr = *patlastptr;
2559    
2560      /* This loop is needed because compiling a "pattern" when -F is set may add
2561      on additional literal patterns if the original contains a newline. In the
2562      common case, it never will, because fgets() stops at a newline. However,
2563      the -N option can be used to give pcregrep a different newline setting. */
2564    
2565    for(;;)    for(;;)
2566      {      {
2567      int ellength;      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2568      char *p = end_of_line(pattern, eop, &ellength);          linenumber))
2569      if (ellength == 0)        {
2570        return compile_single_pattern(pattern, options, filename, count);        if (f != stdin) fclose(f);
     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);  
     pattern = p;  
     if (!compile_single_pattern(buffer, options, filename, count))  
2571        return FALSE;        return FALSE;
2572          }
2573        (*patlastptr)->string = NULL;            /* Insurance */
2574        if ((*patlastptr)->next == NULL) break;
2575        *patlastptr = (*patlastptr)->next;
2576      }      }
2577    }    }
2578  else return compile_single_pattern(pattern, options, filename, count);  
2579    if (f != stdin) fclose(f);
2580    return TRUE;
2581  }  }
2582    
2583    
# Line 1786  main(int argc, char **argv) Line 2593  main(int argc, char **argv)
2593  {  {
2594  int i, j;  int i, j;
2595  int rc = 1;  int rc = 1;
 int pcre_options = 0;  
 int cmd_pattern_count = 0;  
 int hint_count = 0;  
 int errptr;  
2596  BOOL only_one_at_top;  BOOL only_one_at_top;
2597  char *patterns[MAX_PATTERN_COUNT];  patstr *cp;
2598    fnstr *fn;
2599  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2600  const char *error;  const char *error;
2601    
2602    #ifdef SUPPORT_PCREGREP_JIT
2603    pcre_jit_stack *jit_stack = NULL;
2604    #endif
2605    
2606  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2607  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2608  */  Note that the return values from pcre_config(), though derived from the ASCII
2609    codes, are the same in EBCDIC environments, so we must use the actual values
2610    rather than escapes such as as '\r'. */
2611    
2612  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2613  switch(i)  switch(i)
2614    {    {
2615    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
2616    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
2617    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2618    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
2619    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
2620    }    }
2621    
2622  /* Process the options */  /* Process the options */
# Line 1825  for (i = 1; i < argc; i++) Line 2635  for (i = 1; i < argc; i++)
2635    
2636    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2637      {      {
2638      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_files != NULL || patterns != NULL) break;
2639        else exit(usage(2));        else pcregrep_exit(usage(2));
2640      }      }
2641    
2642    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1848  for (i = 1; i < argc; i++) Line 2658  for (i = 1; i < argc; i++)
2658      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2659      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2660      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2661      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2662      these categories, fortunately. */      both these categories. */
2663    
2664      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2665        {        {
2666        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2667        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2668        if (opbra == NULL)     /* Not a (p) case */  
2669          /* Handle options with only one spelling of the name */
2670    
2671          if (opbra == NULL)     /* Does not contain '(' */
2672          {          {
2673          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2674            {            {
# Line 1863  for (i = 1; i < argc; i++) Line 2676  for (i = 1; i < argc; i++)
2676            }            }
2677          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2678            {            {
2679            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2680            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2681                (int)strlen(arg) : (int)(argequals - arg);
2682            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2683              {              {
2684              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1877  for (i = 1; i < argc; i++) Line 2691  for (i = 1; i < argc; i++)
2691              }              }
2692            }            }
2693          }          }
2694        else                   /* Special case xxxx(p) */  
2695          /* Handle options with an alternate spelling of the name */
2696    
2697          else
2698          {          {
2699          char buff1[24];          char buff1[24];
2700          char buff2[24];          char buff2[24];
2701          int baselen = opbra - op->long_name;  
2702            int baselen = (int)(opbra - op->long_name);
2703            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2704            int arglen = (argequals == NULL || equals == NULL)?
2705              (int)strlen(arg) : (int)(argequals - arg);
2706    
2707          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2708          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2709            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2710          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2711               strncmp(arg, buff2, arglen) == 0)
2712              {
2713              if (equals != NULL && argequals != NULL)
2714                {
2715                option_data = argequals;
2716                if (*option_data == '=')
2717                  {
2718                  option_data++;
2719                  longopwasequals = TRUE;
2720                  }
2721                }
2722            break;            break;
2723              }
2724          }          }
2725        }        }
2726    
2727      if (op->one_char == 0)      if (op->one_char == 0)
2728        {        {
2729        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2730        exit(usage(2));        pcregrep_exit(usage(2));
2731        }        }
2732      }      }
2733    
   
2734    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2735    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2736    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1928  for (i = 1; i < argc; i++) Line 2761  for (i = 1; i < argc; i++)
2761      {      {
2762      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2763      longop = FALSE;      longop = FALSE;
2764    
2765      while (*s != 0)      while (*s != 0)
2766        {        {
2767        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2768          { if (*s == op->one_char) break; }          {
2769            if (*s == op->one_char) break;
2770            }
2771        if (op->one_char == 0)        if (op->one_char == 0)
2772          {          {
2773          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2774            *s, argv[i]);            *s, argv[i]);
2775          exit(usage(2));          pcregrep_exit(usage(2));
2776            }
2777    
2778          option_data = s+1;
2779    
2780          /* Break out if this is the last character in the string; it's handled
2781          below like a single multi-char option. */
2782    
2783          if (*option_data == 0) break;
2784    
2785          /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2786          are used for ones that either have a numerical number or defaults, i.e.
2787          the data is optional. If a digit follows, there is data; if not, carry on
2788          with other single-character options in the same string. */
2789    
2790          if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2791            {
2792            if (isdigit((unsigned char)s[1])) break;
2793          }          }
2794        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for an option with data */
2795          {          {
2796          option_data = s+1;          if (op->type != OP_NODATA) break;
         break;  
2797          }          }
2798    
2799          /* Handle a single-character option with no data, then loop for the
2800          next character in the string. */
2801    
2802        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2803        }        }
2804      }      }
# Line 1957  for (i = 1; i < argc; i++) Line 2813  for (i = 1; i < argc; i++)
2813      continue;      continue;
2814      }      }
2815    
2816    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2817    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2818    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2819    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2820    
2821    if (*option_data == 0 &&    if (*option_data == 0 &&
2822        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2823           op->type == OP_OP_NUMBERS))
2824      {      {
2825      switch (op->one_char)      switch (op->one_char)
2826        {        {
2827        case N_COLOUR:        case N_COLOUR:
2828        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2829        break;        break;
2830    
2831          case 'o':
2832          only_matching_last = add_number(0, only_matching_last);
2833          if (only_matching == NULL) only_matching = only_matching_last;
2834          break;
2835    
2836  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2837        case 'S':        case 'S':
2838        S_arg = 0;        S_arg = 0;
# Line 1986  for (i = 1; i < argc; i++) Line 2849  for (i = 1; i < argc; i++)
2849      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2850        {        {
2851        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2852        exit(usage(2));        pcregrep_exit(usage(2));
2853        }        }
2854      option_data = argv[++i];      option_data = argv[++i];
2855      }      }
2856    
2857    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2858    multiple times to create a list of patterns. */    added to a chain of numbers. */
2859    
2860      if (op->type == OP_OP_NUMBERS)
2861        {
2862        unsigned long int n = decode_number(option_data, op, longop);
2863        omdatastr *omd = (omdatastr *)op->dataptr;
2864        *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2865        if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2866        }
2867    
2868      /* If the option type is OP_PATLIST, it's the -e option, or one of the
2869      include/exclude options, which can be called multiple times to create lists
2870      of patterns. */
2871    
2872      else if (op->type == OP_PATLIST)
2873        {
2874        patdatastr *pd = (patdatastr *)op->dataptr;
2875        *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2876        if (*(pd->lastptr) == NULL) goto EXIT2;
2877        if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2878        }
2879    
2880      /* If the option type is OP_FILELIST, it's one of the options that names a
2881      file. */
2882    
2883    if (op->type == OP_PATLIST)    else if (op->type == OP_FILELIST)
2884      {      {
2885      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      fndatastr *fd = (fndatastr *)op->dataptr;
2886        fn = (fnstr *)malloc(sizeof(fnstr));
2887        if (fn == NULL)
2888        {        {
2889        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",        fprintf(stderr, "pcregrep: malloc failed\n");
2890          MAX_PATTERN_COUNT);        goto EXIT2;
2891        return 2;        }
2892        fn->next = NULL;
2893        fn->name = option_data;
2894        if (*(fd->anchor) == NULL)
2895          *(fd->anchor) = fn;
2896        else
2897          (*(fd->lastptr))->next = fn;
2898        *(fd->lastptr) = fn;
2899        }
2900    
2901      /* Handle OP_BINARY_FILES */
2902    
2903      else if (op->type == OP_BINFILES)
2904        {
2905        if (strcmp(option_data, "binary") == 0)
2906          binary_files = BIN_BINARY;
2907        else if (strcmp(option_data, "without-match") == 0)
2908          binary_files = BIN_NOMATCH;
2909        else if (strcmp(option_data, "text") == 0)
2910          binary_files = BIN_TEXT;
2911        else
2912          {
2913          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2914            option_data);
2915          pcregrep_exit(usage(2));
2916        }        }
     patterns[cmd_pattern_count++] = option_data;  
2917      }      }
2918    
2919    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with a single string or numeric data value. */
2920    
2921    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2922               op->type != OP_OP_NUMBER)
2923      {      {
2924      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2925      }      }
2926    else    else
2927      {      {
2928      char *endptr;      unsigned long int n = decode_number(option_data, op, longop);
2929      int n = strtoul(option_data, &endptr, 10);      if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2930      if (*endptr != 0)        else *((int *)op->dataptr) = n;
       {  
       if (longop)  
         {  
         char *equals = strchr(op->long_name, '=');  
         int nlen = (equals == NULL)? (int)strlen(op->long_name) :  
           equals - op->long_name;  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",  
           option_data, nlen, op->long_name);  
         }  
       else  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",  
           option_data, op->one_char);  
       exit(usage(2));  
       }  
     *((int *)op->dataptr) = n;  
2931      }      }
2932    }    }
2933    
# Line 2044  if (both_context > 0) Line 2941  if (both_context > 0)
2941    }    }
2942    
2943  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2944  However, the latter two set the only_matching flag. */  However, all three set show_only_matching because they display, each in their
2945    own way, only the data that has matched. */
2946    
2947  if ((only_matching && (file_offsets || line_offsets)) ||  if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2948      (file_offsets && line_offsets))      (file_offsets && line_offsets))
2949    {    {
2950    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2951      "and/or --line-offsets\n");      "and/or --line-offsets\n");
2952    exit(usage(2));    pcregrep_exit(usage(2));
2953    }    }
2954    
2955  if (file_offsets || line_offsets) only_matching = TRUE;  if (only_matching != NULL || file_offsets || line_offsets)
2956      show_only_matching = TRUE;
2957    
2958  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2959  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2071  if (locale == NULL) Line 2970  if (locale == NULL)
2970    locale_from = "LC_CTYPE";    locale_from = "LC_CTYPE";
2971    }    }
2972    
2973  /* If a locale has been provided, set it, and generate the tables the PCRE  /* If a locale is set, use it to generate the tables the PCRE needs. Otherwise,
2974  needs. Otherwise, pcretables==NULL, which causes the use of default tables. */  pcretables==NULL, which causes the use of default tables. */
2975    
2976  if (locale != NULL)  if (locale != NULL)
2977    {    {
# Line 2080  if (locale != NULL) Line 2979  if (locale != NULL)
2979      {      {
2980      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2981        locale, locale_from);        locale, locale_from);
2982      return 2;      goto EXIT2;
2983      }      }
2984    pcretables = pcre_maketables();    pcretables = pcre_maketables();
2985    }    }
# Line 2095  if (colour_option != NULL && strcmp(colo Line 2994  if (colour_option != NULL && strcmp(colo
2994      {      {
2995      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2996        colour_option);        colour_option);
2997      return 2;      goto EXIT2;
2998      }      }
2999    if (do_colour)    if (do_colour)
3000      {      {
# Line 2135  else if (strcmp(newline, "anycrlf") == 0 Line 3034  else if (strcmp(newline, "anycrlf") == 0
3034  else  else
3035    {    {
3036    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3037    return 2;    goto EXIT2;
3038    }    }
3039    
3040  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
# Line 2148  if (dee_option != NULL) Line 3047  if (dee_option != NULL)
3047    else    else
3048      {      {
3049      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3050      return 2;      goto EXIT2;
3051      }      }
3052    }    }
3053    
# Line 2159  if (DEE_option != NULL) Line 3058  if (DEE_option != NULL)
3058    else    else
3059      {      {
3060      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3061      return 2;      goto EXIT2;
3062      }      }
3063    }    }
3064    
# Line 2178  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 3077  if (jfriedl_XT != 0 || jfriedl_XR != 0)
3077    }    }
3078  #endif  #endif
3079    
3080  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer. */
3081    
3082  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  bufsize = 3*bufthird;
3083  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  main_buffer = (char *)malloc(bufsize);
3084    
3085  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL)
3086    {    {
3087    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
3088    goto EXIT2;    goto EXIT2;
3089    }    }
3090    
3091  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there are no files provided by -f,
3092  the first argument is the one and only pattern, and it must exist. */  the first argument is the one and only pattern, and it must exist. */
3093    
3094  if (cmd_pattern_count == 0 && pattern_filename == NULL)  if (patterns == NULL && pattern_files == NULL)
3095    {    {
3096    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
3097    patterns[cmd_pattern_count++] = argv[i++];    patterns = patterns_last = add_pattern(argv[i++], NULL);
3098      if (patterns == NULL) goto EXIT2;
3099    }    }
3100    
3101  /* Compile the patterns that were provided on the command line, either by  /* Compile the patterns that were provided on the command line, either by
3102  multiple uses of -e or as a single unkeyed pattern. */  multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3103    after all the command-line options are read so that we know which PCRE options
3104    to use. When -F is used, compile_pattern() may add another block into the
3105    chain, so we must not access the next pointer till after the compile. */
3106    
3107  for (j = 0; j < cmd_pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3108    {    {
3109    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3110         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 1 && patterns->next == NULL)? 0 : j))
3111      goto EXIT2;      goto EXIT2;
3112    }    }
3113    
3114  /* Compile the regular expressions that are provided in a file. */  /* Read and compile the regular expressions that are provided in files. */
3115    
3116  if (pattern_filename != NULL)  for (fn = pattern_files; fn != NULL; fn = fn->next)
3117    {    {
3118    int linenumber = 0;    if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3119    FILE *f;      goto EXIT2;
3120    char *filename;    }
   char buffer[MBUFTHIRD];  
3121    
3122    if (strcmp(pattern_filename, "-") == 0)  /* Study the regular expressions, as we will be running them many times. If an
3123      {  extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3124      f = stdin;  returned, even if studying produces no data. */
     filename = stdin_name;  
     }  
   else  
     {  
     f = fopen(pattern_filename, "r");  
     if (f == NULL)  
       {  
       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,  
         strerror(errno));  
       goto EXIT2;  
       }  
     filename = pattern_filename;  
     }  
3125    
3126    while (fgets(buffer, MBUFTHIRD, f) != NULL)  if (match_limit > 0 || match_limit_recursion > 0)
3127      {    study_options |= PCRE_STUDY_EXTRA_NEEDED;
     char *s = buffer + (int)strlen(buffer);  
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;  
     *s = 0;  
     linenumber++;  
     if (buffer[0] == 0) continue;   /* Skip blank lines */  
     if (!compile_pattern(buffer, pcre_options, filename, linenumber))  
       goto EXIT2;  
     }  
3128    
3129    if (f != stdin) fclose(f);  /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
   }  
3130    
3131  /* Study the regular expressions, as we will be running them many times */  #ifdef SUPPORT_PCREGREP_JIT
3132    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3133      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3134    #endif
3135    
3136  for (j = 0; j < pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3137    {    {
3138    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
3139    if (error != NULL)    if (error != NULL)
3140      {      {
3141      char s[16];      char s[16];
3142      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3143      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3144      goto EXIT2;      goto EXIT2;
3145      }      }
3146    hint_count++;  #ifdef SUPPORT_PCREGREP_JIT
3147      if (jit_stack != NULL && cp->hint != NULL)
3148        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3149    #endif
3150    }    }
3151    
3152  /* If there are include or exclude patterns, compile them. */  /* If --match-limit or --recursion-limit was set, put the value(s) into the
3153    pcre_extra block for each pattern. There will always be an extra block because
3154    of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3155    
3156  if (exclude_pattern != NULL)  for (cp = patterns; cp != NULL; cp = cp->next)
3157    {    {
3158    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,    if (match_limit > 0)
     pcretables);  
   if (exclude_compiled == NULL)  
3159      {      {
3160      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3161        errptr, error);      cp->hint->match_limit = match_limit;
3162      goto EXIT2;      }
3163    
3164      if (match_limit_recursion > 0)
3165        {
3166        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3167        cp->hint->match_limit_recursion = match_limit_recursion;
3168      }      }
3169    }    }
3170    
3171  if (include_pattern != NULL)  /* If there are include or exclude patterns read from the command line, compile
3172    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3173    0. */
3174    
3175    for (j = 0; j < 4; j++)
3176    {    {
3177    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,    int k;
3178      pcretables);    for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
   if (include_compiled == NULL)  
3179      {      {
3180      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3181        errptr, error);           (k == 1 && cp->next == NULL)? 0 : k))
3182      goto EXIT2;        goto EXIT2;
3183      }      }
3184    }    }
3185    
3186  /* If there are no further arguments, do the business on stdin and exit. */  /* Read and compile include/exclude patterns from files. */
3187    
3188    for (fn = include_from; fn != NULL; fn = fn->next)
3189      {
3190      if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3191        goto EXIT2;
3192      }
3193    
3194    for (fn = exclude_from; fn != NULL; fn = fn->next)
3195      {
3196      if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3197        goto EXIT2;
3198      }
3199    
3200    /* If there are no files that contain lists of files to search, and there are
3201    no file arguments, search stdin, and then exit. */
3202    
3203  if (i >= argc)  if (file_lists == NULL && i >= argc)
3204    {    {
3205    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3206        (filenames > FN_DEFAULT)? stdin_name : NULL);
3207    goto EXIT;    goto EXIT;
3208    }    }
3209    
3210  /* Otherwise, work through the remaining arguments as files or directories.  /* If any files that contains a list of files to search have been specified,
3211  Pass in the fact that there is only one argument at top level - this suppresses  read them line by line and search the given files. */
3212  the file name if the argument is not a directory and filenames are not  
3213  otherwise forced. */  for (fn = file_lists; fn != NULL; fn = fn->next)
3214      {
3215      char buffer[PATBUFSIZE];
3216      FILE *fl;
3217      if (strcmp(fn->name, "-") == 0) fl = stdin; else
3218        {
3219        fl = fopen(fn->name, "rb");
3220        if (fl == NULL)
3221          {
3222          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3223            strerror(errno));
3224          goto EXIT2;
3225          }
3226        }
3227      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3228        {
3229        int frc;
3230        char *end = buffer + (int)strlen(buffer);
3231        while (end > buffer && isspace(end[-1])) end--;
3232        *end = 0;
3233        if (*buffer != 0)
3234          {
3235          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3236          if (frc > 1) rc = frc;
3237            else if (frc == 0 && rc == 1) rc = 0;
3238          }
3239        }
3240      if (fl != stdin) fclose(fl);
3241      }
3242    
3243    /* After handling file-list, work through remaining arguments. Pass in the fact
3244    that there is only one argument at top level - this suppresses the file name if
3245    the argument is not a directory and filenames are not otherwise forced. */
3246    
3247  only_one_at_top = i == argc - 1;   /* Catch initial value of i */  only_one_at_top = i == argc - 1 && file_lists == NULL;
3248    
3249  for (; i < argc; i++)  for (; i < argc; i++)
3250    {    {
# Line 2313  for (; i < argc; i++) Line 3255  for (; i < argc; i++)
3255    }    }
3256    
3257  EXIT:  EXIT:
3258  if (pattern_list != NULL)  #ifdef SUPPORT_PCREGREP_JIT
3259    {  if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3260    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);  #endif
3261    free(pattern_list);  
3262    }  free(main_buffer);
3263  if (hints_list != NULL)  free((void *)pcretables);
3264    
3265    free_pattern_chain(patterns);
3266    free_pattern_chain(include_patterns);
3267    free_pattern_chain(include_dir_patterns);
3268    free_pattern_chain(exclude_patterns);
3269    free_pattern_chain(exclude_dir_patterns);
3270    
3271    free_file_chain(exclude_from);
3272    free_file_chain(include_from);
3273    free_file_chain(pattern_files);
3274    free_file_chain(file_lists);
3275    
3276    while (only_matching != NULL)
3277    {    {
3278    for (i = 0; i < hint_count; i++) free(hints_list[i]);    omstr *this = only_matching;
3279    free(hints_list);    only_matching = this->next;
3280      free(this);
3281    }    }
3282  return rc;  
3283    pcregrep_exit(rc);
3284    
3285  EXIT2:  EXIT2:
3286  rc = 2;  rc = 2;

Legend:
Removed from v.296  
changed lines
  Added in v.1502

  ViewVC Help
Powered by ViewVC 1.1.5