/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 296 by ph10, Tue Jan 1 20:09:30 2008 UTC revision 1548 by ph10, Tue Apr 14 17:02:30 2015 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7  directories.  recurse into directories, and in z/OS it can handle PDS files.
8    
9             Copyright (c) 1997-2007 University of Cambridge  Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10    additional header is required. That header is not included in the main PCRE
11    distribution because other apparatus is needed to compile pcregrep for z/OS.
12    The header can be found in the special z/OS distribution, which is available
13    from www.zaconsultants.net or from www.cbttape.org.
14    
15               Copyright (c) 1997-2014 University of Cambridge
16    
17  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
18  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 70  POSSIBILITY OF SUCH DAMAGE. Line 76  POSSIBILITY OF SUCH DAMAGE.
76    
77  typedef int BOOL;  typedef int BOOL;
78    
79  #define MAX_PATTERN_COUNT 100  #define OFFSET_SIZE 99
80    
81  #if BUFSIZ > 8192  #if BUFSIZ > 8192
82  #define MBUFTHIRD BUFSIZ  #define MAXPATLEN BUFSIZ
83  #else  #else
84  #define MBUFTHIRD 8192  #define MAXPATLEN 8192
85  #endif  #endif
86    
87    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
88    
89  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
90  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
91  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
92    
93  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94    
95  /* File reading styles */  /* File reading styles */
96    
# Line 103  enum { DEE_READ, DEE_SKIP }; Line 111  enum { DEE_READ, DEE_SKIP };
111    
112  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113    
114    /* Binary file options */
115    
116    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117    
118    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119    environments), a warning is issued if the value of fwrite() is ignored.
120    Unfortunately, casting to (void) does not suppress the warning. To get round
121    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122    apply to fprintf(). */
123    
124    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125    
126    
127    
128  /*************************************************  /*************************************************
# Line 126  static char *colour_string = (char *)"1; Line 146  static char *colour_string = (char *)"1;
146  static char *colour_option = NULL;  static char *colour_option = NULL;
147  static char *dee_option = NULL;  static char *dee_option = NULL;
148  static char *DEE_option = NULL;  static char *DEE_option = NULL;
149    static char *locale = NULL;
150    static char *main_buffer = NULL;
151  static char *newline = NULL;  static char *newline = NULL;
152  static char *pattern_filename = NULL;  static char *om_separator = (char *)"";
153  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
 static char *locale = NULL;  
154    
155  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
156    
 static int  pattern_count = 0;  
 static pcre **pattern_list = NULL;  
 static pcre_extra **hints_list = NULL;  
   
 static char *include_pattern = NULL;  
 static char *exclude_pattern = NULL;  
   
 static pcre *include_compiled = NULL;  
 static pcre *exclude_compiled = NULL;  
   
157  static int after_context = 0;  static int after_context = 0;
158  static int before_context = 0;  static int before_context = 0;
159    static int binary_files = BIN_BINARY;
160  static int both_context = 0;  static int both_context = 0;
161    static int bufthird = PCREGREP_BUFSIZE;
162    static int bufsize = 3*PCREGREP_BUFSIZE;
163    
164    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165    static int dee_action = dee_SKIP;
166    #else
167  static int dee_action = dee_READ;  static int dee_action = dee_READ;
168    #endif
169    
170  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
171  static int error_count = 0;  static int error_count = 0;
172  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
173    static int pcre_options = 0;
174  static int process_options = 0;  static int process_options = 0;
175    
176    #ifdef SUPPORT_PCREGREP_JIT
177    static int study_options = PCRE_STUDY_JIT_COMPILE;
178    #else
179    static int study_options = 0;
180    #endif
181    
182    static unsigned long int match_limit = 0;
183    static unsigned long int match_limit_recursion = 0;
184    
185  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
186  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
187  static BOOL file_offsets = FALSE;  static BOOL file_offsets = FALSE;
188  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
189  static BOOL invert = FALSE;  static BOOL invert = FALSE;
190    static BOOL line_buffered = FALSE;
191  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
192  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
193  static BOOL number = FALSE;  static BOOL number = FALSE;
194  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
195    static BOOL resource_error = FALSE;
196  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
197    static BOOL show_only_matching = FALSE;
198  static BOOL silent = FALSE;  static BOOL silent = FALSE;
199  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
200    
201    /* Structure for list of --only-matching capturing numbers. */
202    
203    typedef struct omstr {
204      struct omstr *next;
205      int groupnum;
206    } omstr;
207    
208    static omstr *only_matching = NULL;
209    static omstr *only_matching_last = NULL;
210    
211    /* Structure for holding the two variables that describe a number chain. */
212    
213    typedef struct omdatastr {
214      omstr **anchor;
215      omstr **lastptr;
216    } omdatastr;
217    
218    static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219    
220    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
221    
222    typedef struct fnstr {
223      struct fnstr *next;
224      char *name;
225    } fnstr;
226    
227    static fnstr *exclude_from = NULL;
228    static fnstr *exclude_from_last = NULL;
229    static fnstr *include_from = NULL;
230    static fnstr *include_from_last = NULL;
231    
232    static fnstr *file_lists = NULL;
233    static fnstr *file_lists_last = NULL;
234    static fnstr *pattern_files = NULL;
235    static fnstr *pattern_files_last = NULL;
236    
237    /* Structure for holding the two variables that describe a file name chain. */
238    
239    typedef struct fndatastr {
240      fnstr **anchor;
241      fnstr **lastptr;
242    } fndatastr;
243    
244    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245    static fndatastr include_from_data = { &include_from, &include_from_last };
246    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248    
249    /* Structure for pattern and its compiled form; used for matching patterns and
250    also for include/exclude patterns. */
251    
252    typedef struct patstr {
253      struct patstr *next;
254      char *string;
255      pcre *compiled;
256      pcre_extra *hint;
257    } patstr;
258    
259    static patstr *patterns = NULL;
260    static patstr *patterns_last = NULL;
261    static patstr *include_patterns = NULL;
262    static patstr *include_patterns_last = NULL;
263    static patstr *exclude_patterns = NULL;
264    static patstr *exclude_patterns_last = NULL;
265    static patstr *include_dir_patterns = NULL;
266    static patstr *include_dir_patterns_last = NULL;
267    static patstr *exclude_dir_patterns = NULL;
268    static patstr *exclude_dir_patterns_last = NULL;
269    
270    /* Structure holding the two variables that describe a pattern chain. A pointer
271    to such structures is used for each appropriate option. */
272    
273    typedef struct patdatastr {
274      patstr **anchor;
275      patstr **lastptr;
276    } patdatastr;
277    
278    static patdatastr match_patdata = { &patterns, &patterns_last };
279    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283    
284    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285                                     &include_dir_patterns, &exclude_dir_patterns };
286    
287    static const char *incexname[4] = { "--include", "--exclude",
288                                        "--include-dir", "--exclude-dir" };
289    
290  /* Structure for options and list of them */  /* Structure for options and list of them */
291    
292  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293         OP_PATLIST };         OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294    
295  typedef struct option_item {  typedef struct option_item {
296    int type;    int type;
# Line 181  typedef struct option_item { Line 303  typedef struct option_item {
303  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
304  used to identify them. */  used to identify them. */
305    
306  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
307  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
308  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
309  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
310  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
311  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
312  #define N_NULL      (-7)  #define N_LABEL        (-7)
313  #define N_LOFFSETS  (-8)  #define N_LOCALE       (-8)
314  #define N_FOFFSETS  (-9)  #define N_NULL         (-9)
315    #define N_LOFFSETS     (-10)
316    #define N_FOFFSETS     (-11)
317    #define N_LBUFFER      (-12)
318    #define N_M_LIMIT      (-13)
319    #define N_M_LIMIT_REC  (-14)
320    #define N_BUFSIZE      (-15)
321    #define N_NOJIT        (-16)
322    #define N_FILE_LIST    (-17)
323    #define N_BINARY_FILES (-18)
324    #define N_EXCLUDE_FROM (-19)
325    #define N_INCLUDE_FROM (-20)
326    #define N_OM_SEPARATOR (-21)
327    
328  static option_item optionlist[] = {  static option_item optionlist[] = {
329    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
330    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
331    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
332    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
333    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
334    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
335    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
336    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
337    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
338    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
339    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
340    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
341    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
342    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
343    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
344    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
345    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
347    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
348    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
349    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
350    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
351    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },  #ifdef SUPPORT_PCREGREP_JIT
352    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
353    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },  #else
354    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
355    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },  #endif
356    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
357    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
358    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
359      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
360      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
361      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
362      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
363      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
365      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
367      { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368      { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
370      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
371      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
372      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
373      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377    
378      /* These two were accidentally implemented with underscores instead of
379      hyphens in the option names. As this was not discovered for several releases,
380      the incorrect versions are left in the table for compatibility. However, the
381      --help function misses out any option that has an underscore in its name. */
382    
383      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385    
386  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
387    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
388  #endif  #endif
# Line 237  static option_item optionlist[] = { Line 398  static option_item optionlist[] = {
398  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399  options. These set the 1, 2, and 4 bits in process_options, respectively. Note  options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400  that the combination of -w and -x has the same effect as -x on its own, so we  that the combination of -w and -x has the same effect as -x on its own, so we
401  can treat them as the same. */  can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402    prefix+suffix is 10 characters; if anything longer is added, it must be
403    adjusted. */
404    
405  static const char *prefix[] = {  static const char *prefix[] = {
406    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
# Line 258  const char utf8_table4[] = { Line 421  const char utf8_table4[] = {
421    
422    
423  /*************************************************  /*************************************************
424    *         Exit from the program                  *
425    *************************************************/
426    
427    /* If there has been a resource error, give a suitable message.
428    
429    Argument:  the return code
430    Returns:   does not return
431    */
432    
433    static void
434    pcregrep_exit(int rc)
435    {
436    if (resource_error)
437      {
438      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440        PCRE_ERROR_JIT_STACKLIMIT);
441      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442      }
443    exit(rc);
444    }
445    
446    
447    /*************************************************
448    *          Add item to chain of patterns         *
449    *************************************************/
450    
451    /* Used to add an item onto a chain, or just return an unconnected item if the
452    "after" argument is NULL.
453    
454    Arguments:
455      s          pattern string to add
456      after      if not NULL points to item to insert after
457    
458    Returns:     new pattern block or NULL on error
459    */
460    
461    static patstr *
462    add_pattern(char *s, patstr *after)
463    {
464    patstr *p = (patstr *)malloc(sizeof(patstr));
465    if (p == NULL)
466      {
467      fprintf(stderr, "pcregrep: malloc failed\n");
468      pcregrep_exit(2);
469      }
470    if (strlen(s) > MAXPATLEN)
471      {
472      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473        MAXPATLEN);
474      free(p);
475      return NULL;
476      }
477    p->next = NULL;
478    p->string = s;
479    p->compiled = NULL;
480    p->hint = NULL;
481    
482    if (after != NULL)
483      {
484      p->next = after->next;
485      after->next = p;
486      }
487    return p;
488    }
489    
490    
491    /*************************************************
492    *           Free chain of patterns               *
493    *************************************************/
494    
495    /* Used for several chains of patterns.
496    
497    Argument: pointer to start of chain
498    Returns:  nothing
499    */
500    
501    static void
502    free_pattern_chain(patstr *pc)
503    {
504    while (pc != NULL)
505      {
506      patstr *p = pc;
507      pc = p->next;
508      if (p->hint != NULL) pcre_free_study(p->hint);
509      if (p->compiled != NULL) pcre_free(p->compiled);
510      free(p);
511      }
512    }
513    
514    
515    /*************************************************
516    *           Free chain of file names             *
517    *************************************************/
518    
519    /*
520    Argument: pointer to start of chain
521    Returns:  nothing
522    */
523    
524    static void
525    free_file_chain(fnstr *fn)
526    {
527    while (fn != NULL)
528      {
529      fnstr *f = fn;
530      fn = f->next;
531      free(f);
532      }
533    }
534    
535    
536    /*************************************************
537  *            OS-specific functions               *  *            OS-specific functions               *
538  *************************************************/  *************************************************/
539    
540  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific.
541  although at present the only ones are for Unix, Win32, and for "no support". */  At present there are versions for Unix-style environments, Windows, native
542    z/OS, and "no support". */
543    
544    
545  /************* Directory scanning in Unix ***********/  /************* Directory scanning Unix-style and z/OS ***********/
546    
547  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H  #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
548  #include <sys/types.h>  #include <sys/types.h>
549  #include <sys/stat.h>  #include <sys/stat.h>
550  #include <dirent.h>  #include <dirent.h>
551    
552    #if defined NATIVE_ZOS
553    /************* Directory and PDS/E scanning for z/OS ***********/
554    /************* z/OS looks mostly like Unix with USS ************/
555    /* However, z/OS needs the #include statements in this header */
556    #include "pcrzosfs.h"
557    /* That header is not included in the main PCRE distribution because
558       other apparatus is needed to compile pcregrep for z/OS. The header
559       can be found in the special z/OS distribution, which is available
560       from www.zaconsultants.net or from www.cbttape.org. */
561    #endif
562    
563  typedef DIR directory_type;  typedef DIR directory_type;
564    #define FILESEP '/'
565    
566  static int  static int
567  isdirectory(char *filename)  isdirectory(char *filename)
# Line 280  isdirectory(char *filename) Line 569  isdirectory(char *filename)
569  struct stat statbuf;  struct stat statbuf;
570  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
571    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
572  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
573  }  }
574    
575  static directory_type *  static directory_type *
# Line 309  closedir(dir); Line 598  closedir(dir);
598  }  }
599    
600    
601  /************* Test for regular file in Unix **********/  /************* Test for regular file, Unix-style **********/
602    
603  static int  static int
604  isregfile(char *filename)  isregfile(char *filename)
# Line 321  return (statbuf.st_mode & S_IFMT) == S_I Line 610  return (statbuf.st_mode & S_IFMT) == S_I
610  }  }
611    
612    
613  /************* Test stdout for being a terminal in Unix **********/  #if defined NATIVE_ZOS
614    /************* Test for a terminal in z/OS **********/
615    /* isatty() does not work in a TSO environment, so always give FALSE.*/
616    
617    static BOOL
618    is_stdout_tty(void)
619    {
620    return FALSE;
621    }
622    
623    static BOOL
624    is_file_tty(FILE *f)
625    {
626    return FALSE;
627    }
628    
629    
630    /************* Test for a terminal, Unix-style **********/
631    
632    #else
633  static BOOL  static BOOL
634  is_stdout_tty(void)  is_stdout_tty(void)
635  {  {
636  return isatty(fileno(stdout));  return isatty(fileno(stdout));
637  }  }
638    
639    static BOOL
640    is_file_tty(FILE *f)
641    {
642    return isatty(fileno(f));
643    }
644    #endif
645    
646  /************* Directory scanning in Win32 ***********/  /* End of Unix-style or native z/OS environment functions. */
647    
648    
649    /************* Directory scanning in Windows ***********/
650    
651  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
652  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
653  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
654  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
655  */  The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
656    undefined when it is indeed undefined. */
657    
658  #elif HAVE_WINDOWS_H  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
659    
660  #ifndef STRICT  #ifndef STRICT
661  # define STRICT  # define STRICT
# Line 360  BOOL first; Line 677  BOOL first;
677  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
678  } directory_type;  } directory_type;
679    
680    #define FILESEP '/'
681    
682  int  int
683  isdirectory(char *filename)  isdirectory(char *filename)
684  {  {
685  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
686  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
687    return 0;    return 0;
688  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
689  }  }
690    
691  directory_type *  directory_type *
# Line 377  char *pattern; Line 696  char *pattern;
696  directory_type *dir;  directory_type *dir;
697  DWORD err;  DWORD err;
698  len = strlen(filename);  len = strlen(filename);
699  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
700  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
701  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
702    {    {
703    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
704    exit(2);    pcregrep_exit(2);
705    }    }
706  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
707  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 430  free(dir); Line 749  free(dir);
749  }  }
750    
751    
752  /************* Test for regular file in Win32 **********/  /************* Test for regular file in Windows **********/
753    
754  /* I don't know how to do this, or if it can be done; assume all paths are  /* I don't know how to do this, or if it can be done; assume all paths are
755  regular if they are not directories. */  regular if they are not directories. */
# Line 441  return !isdirectory(filename); Line 760  return !isdirectory(filename);
760  }  }
761    
762    
763  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Windows **********/
764    
765  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
766    
# Line 451  is_stdout_tty(void) Line 770  is_stdout_tty(void)
770  return FALSE;  return FALSE;
771  }  }
772    
773    static BOOL
774    is_file_tty(FILE *f)
775    {
776    return FALSE;
777    }
778    
779    /* End of Windows functions */
780    
781    
782  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
783    
# Line 458  return FALSE; Line 785  return FALSE;
785    
786  #else  #else
787    
788    #define FILESEP 0
789  typedef void directory_type;  typedef void directory_type;
790    
791  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
# Line 466  char *readdirectory(directory_type *dir) Line 794  char *readdirectory(directory_type *dir)
794  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
795    
796    
797  /************* Test for regular when we can't do it **********/  /************* Test for regular file when we can't do it **********/
798    
799  /* Assume all files are regular. */  /* Assume all files are regular. */
800    
801  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
802    
803    
804  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
805    
806  static BOOL  static BOOL
807  is_stdout_tty(void)  is_stdout_tty(void)
# Line 481  is_stdout_tty(void) Line 809  is_stdout_tty(void)
809  return FALSE;  return FALSE;
810  }  }
811    
812    static BOOL
813    is_file_tty(FILE *f)
814    {
815    return FALSE;
816    }
817    
818  #endif  #endif  /* End of system-specific functions */
819    
820    
821    
# Line 509  return sys_errlist[n]; Line 842  return sys_errlist[n];
842    
843    
844  /*************************************************  /*************************************************
845    *                Usage function                  *
846    *************************************************/
847    
848    static int
849    usage(int rc)
850    {
851    option_item *op;
852    fprintf(stderr, "Usage: pcregrep [-");
853    for (op = optionlist; op->one_char != 0; op++)
854      {
855      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
856      }
857    fprintf(stderr, "] [long options] [pattern] [files]\n");
858    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
859      "options.\n");
860    return rc;
861    }
862    
863    
864    
865    /*************************************************
866    *                Help function                   *
867    *************************************************/
868    
869    static void
870    help(void)
871    {
872    option_item *op;
873    
874    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
875    printf("Search for PATTERN in each FILE or standard input.\n");
876    printf("PATTERN must be present if neither -e nor -f is used.\n");
877    printf("\"-\" can be used as a file name to mean STDIN.\n");
878    
879    #ifdef SUPPORT_LIBZ
880    printf("Files whose names end in .gz are read using zlib.\n");
881    #endif
882    
883    #ifdef SUPPORT_LIBBZ2
884    printf("Files whose names end in .bz2 are read using bzlib2.\n");
885    #endif
886    
887    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
888    printf("Other files and the standard input are read as plain files.\n\n");
889    #else
890    printf("All files are read as plain files, without any interpretation.\n\n");
891    #endif
892    
893    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
894    printf("Options:\n");
895    
896    for (op = optionlist; op->one_char != 0; op++)
897      {
898      int n;
899      char s[4];
900    
901      /* Two options were accidentally implemented and documented with underscores
902      instead of hyphens in their names, something that was not noticed for quite a
903      few releases. When fixing this, I left the underscored versions in the list
904      in case people were using them. However, we don't want to display them in the
905      help data. There are no other options that contain underscores, and we do not
906      expect ever to implement such options. Therefore, just omit any option that
907      contains an underscore. */
908    
909      if (strchr(op->long_name, '_') != NULL) continue;
910    
911      if (op->one_char > 0 && (op->long_name)[0] == 0)
912        n = 31 - printf("  -%c", op->one_char);
913      else
914        {
915        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
916          else strcpy(s, "   ");
917        n = 31 - printf("  %s --%s", s, op->long_name);
918        }
919    
920      if (n < 1) n = 1;
921      printf("%.*s%s\n", n, "                           ", op->help_text);
922      }
923    
924    printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
925    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
926    printf("When reading patterns or file names from a file, trailing white\n");
927    printf("space is removed and blank lines are ignored.\n");
928    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
929    
930    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
931    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
932    }
933    
934    
935    
936    /*************************************************
937    *            Test exclude/includes               *
938    *************************************************/
939    
940    /* If any exclude pattern matches, the path is excluded. Otherwise, unless
941    there are no includes, the path must match an include pattern.
942    
943    Arguments:
944      path      the path to be matched
945      ip        the chain of include patterns
946      ep        the chain of exclude patterns
947    
948    Returns:    TRUE if the path is not excluded
949    */
950    
951    static BOOL
952    test_incexc(char *path, patstr *ip, patstr *ep)
953    {
954    int plen = strlen(path);
955    
956    for (; ep != NULL; ep = ep->next)
957      {
958      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
959        return FALSE;
960      }
961    
962    if (ip == NULL) return TRUE;
963    
964    for (; ip != NULL; ip = ip->next)
965      {
966      if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
967        return TRUE;
968      }
969    
970    return FALSE;
971    }
972    
973    
974    
975    /*************************************************
976    *         Decode integer argument value          *
977    *************************************************/
978    
979    /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
980    because SunOS4 doesn't have it. This is used only for unpicking arguments, so
981    just keep it simple.
982    
983    Arguments:
984      option_data   the option data string
985      op            the option item (for error messages)
986      longop        TRUE if option given in long form
987    
988    Returns:        a long integer
989    */
990    
991    static long int
992    decode_number(char *option_data, option_item *op, BOOL longop)
993    {
994    unsigned long int n = 0;
995    char *endptr = option_data;
996    while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
997    while (isdigit((unsigned char)(*endptr)))
998      n = n * 10 + (int)(*endptr++ - '0');
999    if (toupper(*endptr) == 'K')
1000      {
1001      n *= 1024;
1002      endptr++;
1003      }
1004    else if (toupper(*endptr) == 'M')
1005      {
1006      n *= 1024*1024;
1007      endptr++;
1008      }
1009    
1010    if (*endptr != 0)   /* Error */
1011      {
1012      if (longop)
1013        {
1014        char *equals = strchr(op->long_name, '=');
1015        int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1016          (int)(equals - op->long_name);
1017        fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1018          option_data, nlen, op->long_name);
1019        }
1020      else
1021        fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1022          option_data, op->one_char);
1023      pcregrep_exit(usage(2));
1024      }
1025    
1026    return n;
1027    }
1028    
1029    
1030    
1031    /*************************************************
1032    *       Add item to a chain of numbers           *
1033    *************************************************/
1034    
1035    /* Used to add an item onto a chain, or just return an unconnected item if the
1036    "after" argument is NULL.
1037    
1038    Arguments:
1039      n          the number to add
1040      after      if not NULL points to item to insert after
1041    
1042    Returns:     new number block
1043    */
1044    
1045    static omstr *
1046    add_number(int n, omstr *after)
1047    {
1048    omstr *om = (omstr *)malloc(sizeof(omstr));
1049    
1050    if (om == NULL)
1051      {
1052      fprintf(stderr, "pcregrep: malloc failed\n");
1053      pcregrep_exit(2);
1054      }
1055    om->next = NULL;
1056    om->groupnum = n;
1057    
1058    if (after != NULL)
1059      {
1060      om->next = after->next;
1061      after->next = om;
1062      }
1063    return om;
1064    }
1065    
1066    
1067    
1068    /*************************************************
1069    *            Read one line of input              *
1070    *************************************************/
1071    
1072    /* Normally, input is read using fread() into a large buffer, so many lines may
1073    be read at once. However, doing this for tty input means that no output appears
1074    until a lot of input has been typed. Instead, tty input is handled line by
1075    line. We cannot use fgets() for this, because it does not stop at a binary
1076    zero, and therefore there is no way of telling how many characters it has read,
1077    because there may be binary zeros embedded in the data.
1078    
1079    Arguments:
1080      buffer     the buffer to read into
1081      length     the maximum number of characters to read
1082      f          the file
1083    
1084    Returns:     the number of characters read, zero at end of file
1085    */
1086    
1087    static unsigned int
1088    read_one_line(char *buffer, int length, FILE *f)
1089    {
1090    int c;
1091    int yield = 0;
1092    while ((c = fgetc(f)) != EOF)
1093      {
1094      buffer[yield++] = c;
1095      if (c == '\n' || yield >= length) break;
1096      }
1097    return yield;
1098    }
1099    
1100    
1101    
1102    /*************************************************
1103  *             Find end of line                   *  *             Find end of line                   *
1104  *************************************************/  *************************************************/
1105    
# Line 520  Arguments: Line 1111  Arguments:
1111    endptr    end of available data    endptr    end of available data
1112    lenptr    where to put the length of the eol sequence    lenptr    where to put the length of the eol sequence
1113    
1114  Returns:    pointer to the last byte of the line  Returns:    pointer after the last byte of the line,
1115                including the newline byte(s)
1116  */  */
1117    
1118  static char *  static char *
# Line 589  switch(endlinetype) Line 1181  switch(endlinetype)
1181    
1182      switch (c)      switch (c)
1183        {        {
1184        case 0x0a:    /* LF */        case '\n':
1185        *lenptr = 1;        *lenptr = 1;
1186        return p;        return p;
1187    
1188        case 0x0d:    /* CR */        case '\r':
1189        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
1190          {          {
1191          *lenptr = 2;          *lenptr = 2;
1192          p++;          p++;
# Line 633  switch(endlinetype) Line 1225  switch(endlinetype)
1225    
1226      switch (c)      switch (c)
1227        {        {
1228        case 0x0a:    /* LF */        case '\n':    /* LF */
1229        case 0x0b:    /* VT */        case '\v':    /* VT */
1230        case 0x0c:    /* FF */        case '\f':    /* FF */
1231        *lenptr = 1;        *lenptr = 1;
1232        return p;        return p;
1233    
1234        case 0x0d:    /* CR */        case '\r':    /* CR */
1235        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
1236          {          {
1237          *lenptr = 2;          *lenptr = 2;
1238          p++;          p++;
# Line 648  switch(endlinetype) Line 1240  switch(endlinetype)
1240        else *lenptr = 1;        else *lenptr = 1;
1241        return p;        return p;
1242    
1243        case 0x85:    /* NEL */  #ifndef EBCDIC
1244          case 0x85:    /* Unicode NEL */
1245        *lenptr = utf8? 2 : 1;        *lenptr = utf8? 2 : 1;
1246        return p;        return p;
1247    
1248        case 0x2028:  /* LS */        case 0x2028:  /* Unicode LS */
1249        case 0x2029:  /* PS */        case 0x2029:  /* Unicode PS */
1250        *lenptr = 3;        *lenptr = 3;
1251        return p;        return p;
1252    #endif  /* Not EBCDIC */
1253    
1254        default:        default:
1255        break;        break;
# Line 705  switch(endlinetype) Line 1299  switch(endlinetype)
1299      while (p > startptr && p[-1] != '\n') p--;      while (p > startptr && p[-1] != '\n') p--;
1300      if (p <= startptr + 1 || p[-2] == '\r') return p;      if (p <= startptr + 1 || p[-2] == '\r') return p;
1301      }      }
1302    return p;   /* But control should never get here */    /* Control can never get here */
1303    
1304    case EL_ANY:    case EL_ANY:
1305    case EL_ANYCRLF:    case EL_ANYCRLF:
# Line 714  switch(endlinetype) Line 1308  switch(endlinetype)
1308    
1309    while (p > startptr)    while (p > startptr)
1310      {      {
1311      register int c;      register unsigned int c;
1312      char *pp = p - 1;      char *pp = p - 1;
1313    
1314      if (utf8)      if (utf8)
# Line 739  switch(endlinetype) Line 1333  switch(endlinetype)
1333    
1334      if (endlinetype == EL_ANYCRLF) switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
1335        {        {
1336        case 0x0a:    /* LF */        case '\n':    /* LF */
1337        case 0x0d:    /* CR */        case '\r':    /* CR */
1338        return p;        return p;
1339    
1340        default:        default:
# Line 749  switch(endlinetype) Line 1343  switch(endlinetype)
1343    
1344      else switch (c)      else switch (c)
1345        {        {
1346        case 0x0a:    /* LF */        case '\n':    /* LF */
1347        case 0x0b:    /* VT */        case '\v':    /* VT */
1348        case 0x0c:    /* FF */        case '\f':    /* FF */
1349        case 0x0d:    /* CR */        case '\r':    /* CR */
1350        case 0x85:    /* NEL */  #ifndef EBCDIE
1351        case 0x2028:  /* LS */        case 0x85:    /* Unicode NEL */
1352        case 0x2029:  /* PS */        case 0x2028:  /* Unicode LS */
1353          case 0x2029:  /* Unicode PS */
1354    #endif  /* Not EBCDIC */
1355        return p;        return p;
1356    
1357        default:        default:
# Line 790  Arguments: Line 1386  Arguments:
1386  Returns:            nothing  Returns:            nothing
1387  */  */
1388    
1389  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,  static void
1390    char *endptr, char *printname)  do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1391      char *printname)
1392  {  {
1393  if (after_context > 0 && lastmatchnumber > 0)  if (after_context > 0 && lastmatchnumber > 0)
1394    {    {
# Line 803  if (after_context > 0 && lastmatchnumber Line 1400  if (after_context > 0 && lastmatchnumber
1400      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
1401      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1402      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
1403      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1404      lastmatchrestart = pp;      lastmatchrestart = pp;
1405      }      }
1406    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 813  if (after_context > 0 && lastmatchnumber Line 1410  if (after_context > 0 && lastmatchnumber
1410    
1411    
1412  /*************************************************  /*************************************************
1413    *   Apply patterns to subject till one matches   *
1414    *************************************************/
1415    
1416    /* This function is called to run through all patterns, looking for a match. It
1417    is used multiple times for the same subject when colouring is enabled, in order
1418    to find all possible matches.
1419    
1420    Arguments:
1421      matchptr     the start of the subject
1422      length       the length of the subject to match
1423      options      options for pcre_exec
1424      startoffset  where to start matching
1425      offsets      the offets vector to fill in
1426      mrc          address of where to put the result of pcre_exec()
1427    
1428    Returns:      TRUE if there was a match
1429                  FALSE if there was no match
1430                  invert if there was a non-fatal error
1431    */
1432    
1433    static BOOL
1434    match_patterns(char *matchptr, size_t length, unsigned int options,
1435      int startoffset, int *offsets, int *mrc)
1436    {
1437    int i;
1438    size_t slen = length;
1439    patstr *p = patterns;
1440    const char *msg = "this text:\n\n";
1441    
1442    if (slen > 200)
1443      {
1444      slen = 200;
1445      msg = "text that starts:\n\n";
1446      }
1447    for (i = 1; p != NULL; p = p->next, i++)
1448      {
1449      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1450        startoffset, options, offsets, OFFSET_SIZE);
1451      if (*mrc >= 0) return TRUE;
1452      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1453      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1454      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1455      fprintf(stderr, "%s", msg);
1456      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1457      fprintf(stderr, "\n\n");
1458      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1459          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1460        resource_error = TRUE;
1461      if (error_count++ > 20)
1462        {
1463        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1464        pcregrep_exit(2);
1465        }
1466      return invert;    /* No more matching; don't show the line again */
1467      }
1468    
1469    return FALSE;  /* No match, no errors */
1470    }
1471    
1472    
1473    
1474    /*************************************************
1475  *            Grep an individual file             *  *            Grep an individual file             *
1476  *************************************************/  *************************************************/
1477    
1478  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1479  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1480  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1481  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1482  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
# Line 828  Arguments: Line 1487  Arguments:
1487                 the gzFile pointer when reading is via libz                 the gzFile pointer when reading is via libz
1488                 the BZFILE pointer when reading is via libbz2                 the BZFILE pointer when reading is via libbz2
1489    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1490      filename     the file name or NULL (for errors)
1491    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1492                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1493                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1494    
1495  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1496                 1 otherwise (no matches)                 1 otherwise (no matches)
1497                 2 if there is a read error on a .bz2 file                 2 if an overlong line is encountered
1498                   3 if there is a read error on a .bz2 file
1499  */  */
1500    
1501  static int  static int
1502  pcregrep(void *handle, int frtype, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1503  {  {
1504  int rc = 1;  int rc = 1;
1505  int linenumber = 1;  int linenumber = 1;
1506  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1507  int count = 0;  int count = 0;
1508  int filepos = 0;  int filepos = 0;
1509  int offsets[99];  int offsets[OFFSET_SIZE];
1510  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1511  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1512  char *endptr;  char *endptr;
1513  size_t bufflength;  size_t bufflength;
1514    BOOL binary = FALSE;
1515  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1516    BOOL input_line_buffered = line_buffered;
1517  FILE *in = NULL;                    /* Ensure initialized */  FILE *in = NULL;                    /* Ensure initialized */
1518    
1519  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 868  of what we have. In the case of libz, a Line 1530  of what we have. In the case of libz, a
1530  plain file. However, if a .bz2 file isn't actually bzipped, the first read will  plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1531  fail. */  fail. */
1532    
1533    (void)frtype;
1534    
1535  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
1536  if (frtype == FR_LIBZ)  if (frtype == FR_LIBZ)
1537    {    {
1538    ingz = (gzFile)handle;    ingz = (gzFile)handle;
1539    bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);    bufflength = gzread (ingz, main_buffer, bufsize);
1540    }    }
1541  else  else
1542  #endif  #endif
# Line 881  else Line 1545  else
1545  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
1546    {    {
1547    inbz2 = (BZFILE *)handle;    inbz2 = (BZFILE *)handle;
1548    bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);    bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1549    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1550    }                                    /* without the cast it is unsigned. */    }                                    /* without the cast it is unsigned. */
1551  else  else
# Line 889  else Line 1553  else
1553    
1554    {    {
1555    in = (FILE *)handle;    in = (FILE *)handle;
1556    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);    if (is_file_tty(in)) input_line_buffered = TRUE;
1557      bufflength = input_line_buffered?
1558        read_one_line(main_buffer, bufsize, in) :
1559        fread(main_buffer, 1, bufsize, in);
1560    }    }
1561    
1562  endptr = buffer + bufflength;  endptr = main_buffer + bufflength;
1563    
1564    /* Unless binary-files=text, see if we have a binary file. This uses the same
1565    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1566    file. */
1567    
1568    if (binary_files != BIN_TEXT)
1569      {
1570      binary =
1571        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1572      if (binary && binary_files == BIN_NOMATCH) return 1;
1573      }
1574    
1575  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1576  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 901  way, the buffer is shifted left and re-f Line 1579  way, the buffer is shifted left and re-f
1579    
1580  while (ptr < endptr)  while (ptr < endptr)
1581    {    {
1582    int i, endlinelength;    int endlinelength;
1583    int mrc = 0;    int mrc = 0;
1584    BOOL match = FALSE;    int startoffset = 0;
1585      int prevoffsets[2];
1586      unsigned int options = 0;
1587      BOOL match;
1588    char *matchptr = ptr;    char *matchptr = ptr;
1589    char *t = ptr;    char *t = ptr;
1590    size_t length, linelength;    size_t length, linelength;
1591    
1592      prevoffsets[0] = prevoffsets[1] = -1;
1593    
1594    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1595    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1596    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1597    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1598    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1599    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1600      first line. */
1601    
1602    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1603    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1604    length = multiline? (size_t)(endptr - ptr) : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1605    
1606      /* Check to see if the line we are looking at extends right to the very end
1607      of the buffer without a line terminator. This means the line is too long to
1608      handle. */
1609    
1610      if (endlinelength == 0 && t == main_buffer + bufsize)
1611        {
1612        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1613                        "pcregrep: check the --buffer-size option\n",
1614                        linenumber,
1615                        (filename == NULL)? "" : " of file ",
1616                        (filename == NULL)? "" : filename);
1617        return 2;
1618        }
1619    
1620    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1621    
1622  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
1623    if (jfriedl_XT || jfriedl_XR)    if (jfriedl_XT || jfriedl_XR)
1624    {    {
1625        #include <sys/time.h>  #     include <sys/time.h>
1626        #include <time.h>  #     include <time.h>
1627        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1628        struct timezone dummy;        struct timezone dummy;
1629          int i;
1630    
1631        if (jfriedl_XT)        if (jfriedl_XT)
1632        {        {
# Line 936  while (ptr < endptr) Line 1635  while (ptr < endptr)
1635            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1636            if (!ptr) {            if (!ptr) {
1637                    printf("out of memory");                    printf("out of memory");
1638                    exit(2);                    pcregrep_exit(2);
1639            }            }
1640            endptr = ptr;            endptr = ptr;
1641            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 953  while (ptr < endptr) Line 1652  while (ptr < endptr)
1652    
1653    
1654        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1655            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1656                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1657    
1658        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1659                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 967  while (ptr < endptr) Line 1667  while (ptr < endptr)
1667    }    }
1668  #endif  #endif
1669    
1670    /* We come back here after a match when the -o option (only_matching) is set,    /* We come back here after a match when show_only_matching is set, in order
1671    in order to find any further matches in the same line. */    to find any further matches in the same line. This applies to
1672      --only-matching, --file-offsets, and --line-offsets. */
1673    
1674    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1675    
1676    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches or there is an error other
1677    the final newline in the subject string. */    than NOMATCH. This code is in a subroutine so that it can be re-used for
1678      finding subsequent matches when colouring matched lines. After finding one
1679      match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1680      this line. */
1681    
1682    for (i = 0; i < pattern_count; i++)    match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1683      {    options = PCRE_NOTEMPTY;
     mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1684    
1685    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1686    
# Line 1018  while (ptr < endptr) Line 1696  while (ptr < endptr)
1696    
1697      if (count_only) count++;      if (count_only) count++;
1698    
1699        /* When handling a binary file and binary-files==binary, the "binary"
1700        variable will be set true (it's false in all other cases). In this
1701        situation we just want to output the file name. No need to scan further. */
1702    
1703        else if (binary)
1704          {
1705          fprintf(stdout, "Binary file %s matches\n", filename);
1706          return 0;
1707          }
1708    
1709      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1710      in the file. */      in the file. */
1711    
1712      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1713        {        {
1714        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1715        return 0;        return 0;
# Line 1031  while (ptr < endptr) Line 1719  while (ptr < endptr)
1719    
1720      else if (quiet) return 0;      else if (quiet) return 0;
1721    
1722      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched,
1723      the --file-offsets and --line-offsets options output offsets for the      and/or one or more captured portions of it, as long as these strings are
1724      matching substring (they both force --only-matching). None of these options      not empty. The --file-offsets and --line-offsets options output offsets for
1725      prints any context. Afterwards, adjust the start and length, and then jump      the matching substring (all three set show_only_matching). None of these
1726      back to look for further matches in the same line. If we are in invert      mutually exclusive options prints any context. Afterwards, adjust the start
1727      mode, however, nothing is printed - this could be still useful because the      and then jump back to look for further matches in the same line. If we are
1728      return code is set. */      in invert mode, however, nothing is printed and we do not restart - this
1729        could still be useful because the return code is set. */
1730    
1731      else if (only_matching)      else if (show_only_matching)
1732        {        {
1733        if (!invert)        if (!invert)
1734          {          {
1735          if (printname != NULL) fprintf(stdout, "%s:", printname);          int oldstartoffset = startoffset;
1736          if (number) fprintf(stdout, "%d:", linenumber);  
1737          if (line_offsets)          /* It is possible, when a lookbehind assertion contains \K, for the
1738            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,          same string to be found again. The code below advances startoffset, but
1739              offsets[1] - offsets[0]);          until it is past the "bumpalong" offset that gave the match, the same
1740          else if (file_offsets)          substring will be returned. The PCRE1 library does not return the
1741            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,          bumpalong offset, so all we can do is ignore repeated strings. (PCRE2
1742              offsets[1] - offsets[0]);          does this better.) */
1743          else  
1744            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1])
1745          fprintf(stdout, "\n");            {
1746          matchptr += offsets[1];            prevoffsets[0] = offsets[0];
1747          length -= offsets[1];            prevoffsets[1] = offsets[1];
1748    
1749              if (printname != NULL) fprintf(stdout, "%s:", printname);
1750              if (number) fprintf(stdout, "%d:", linenumber);
1751    
1752              /* Handle --line-offsets */
1753    
1754              if (line_offsets)
1755                fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1756                  offsets[1] - offsets[0]);
1757    
1758              /* Handle --file-offsets */
1759    
1760              else if (file_offsets)
1761                fprintf(stdout, "%d,%d\n",
1762                  (int)(filepos + matchptr + offsets[0] - ptr),
1763                  offsets[1] - offsets[0]);
1764    
1765              /* Handle --only-matching, which may occur many times */
1766    
1767              else
1768                {
1769                BOOL printed = FALSE;
1770                omstr *om;
1771    
1772                for (om = only_matching; om != NULL; om = om->next)
1773                  {
1774                  int n = om->groupnum;
1775                  if (n < mrc)
1776                    {
1777                    int plen = offsets[2*n + 1] - offsets[2*n];
1778                    if (plen > 0)
1779                      {
1780                      if (printed) fprintf(stdout, "%s", om_separator);
1781                      if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1782                      FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1783                      if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1784                      printed = TRUE;
1785                      }
1786                    }
1787                  }
1788    
1789                if (printed || printname != NULL || number) fprintf(stdout, "\n");
1790                }
1791              }
1792    
1793            /* Prepare to repeat to find the next match. If the patterned contained
1794            a lookbehind tht included \K, it is possible that the end of the match
1795            might be at or before the actual strting offset we have just used. We
1796            need to start one character further on. Unfortunately, for unanchored
1797            patterns, the actual start offset can be greater that the one that was
1798            set as a result of "bumpalong". PCRE1 does not return the actual start
1799            offset, so we have to check against the original start offset. This may
1800            lead to duplicates - we we need the fudge above to avoid printing them.
1801            (PCRE2 does this better.) */
1802    
1803          match = FALSE;          match = FALSE;
1804            if (line_buffered) fflush(stdout);
1805            rc = 0;                      /* Had some success */
1806            startoffset = offsets[1];    /* Restart after the match */
1807            if (startoffset <= oldstartoffset)
1808              {
1809              if ((size_t)startoffset >= length)
1810                goto END_ONE_MATCH;              /* We were at the end */
1811              startoffset = oldstartoffset + 1;
1812              if (utf8)
1813                while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
1814              }
1815          goto ONLY_MATCHING_RESTART;          goto ONLY_MATCHING_RESTART;
1816          }          }
1817        }        }
# Line 1092  while (ptr < endptr) Line 1847  while (ptr < endptr)
1847            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1848            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1849            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1850            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1851            lastmatchrestart = pp;            lastmatchrestart = pp;
1852            }            }
1853          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1115  while (ptr < endptr) Line 1870  while (ptr < endptr)
1870          int linecount = 0;          int linecount = 0;
1871          char *p = ptr;          char *p = ptr;
1872    
1873          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1874                 linecount < before_context)                 linecount < before_context)
1875            {            {
1876            linecount++;            linecount++;
1877            p = previous_line(p, buffer);            p = previous_line(p, main_buffer);
1878            }            }
1879    
1880          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 1132  while (ptr < endptr) Line 1887  while (ptr < endptr)
1887            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1888            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1889            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1890            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1891            p = pp;            p = pp;
1892            }            }
1893          }          }
# Line 1152  while (ptr < endptr) Line 1907  while (ptr < endptr)
1907        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1908        the match will always be before the first newline sequence. */        the match will always be before the first newline sequence. */
1909    
1910        if (multiline)        if (multiline & !invert)
1911          {          {
1912          int ellength;          char *endmatch = ptr + offsets[1];
1913          char *endmatch = ptr;          t = ptr;
1914          if (!invert)          while (t <= endmatch)
1915            {            {
1916            endmatch += offsets[1];            t = end_of_line(t, endptr, &endlinelength);
1917            t = ptr;            if (t < endmatch) linenumber++; else break;
           while (t < endmatch)  
             {  
             t = end_of_line(t, endptr, &ellength);  
             if (t <= endmatch) linenumber++; else break;  
             }  
1918            }            }
1919          endmatch = end_of_line(endmatch, endptr, &ellength);          linelength = t - ptr - endlinelength;
         linelength = endmatch - ptr - ellength;  
1920          }          }
1921    
1922        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 1182  while (ptr < endptr) Line 1931  while (ptr < endptr)
1931          {          {
1932          int first = S_arg * 2;          int first = S_arg * 2;
1933          int last  = first + 1;          int last  = first + 1;
1934          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1935          fprintf(stdout, "X");          fprintf(stdout, "X");
1936          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1937          }          }
1938        else        else
1939  #endif  #endif
1940    
1941        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1942          matches, but not of course if the line is a non-match. */
1943    
1944        if (do_colour)        if (do_colour && !invert)
1945          {          {
1946          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1947            FWRITE(ptr, 1, offsets[0], stdout);
1948          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1949          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1950          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1951          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1952            stdout);            {
1953              startoffset = offsets[1];
1954              if (startoffset >= (int)linelength + endlinelength ||
1955                  !match_patterns(matchptr, length, options, startoffset, offsets,
1956                    &mrc))
1957                break;
1958              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1959              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1960              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1961              fprintf(stdout, "%c[00m", 0x1b);
1962              }
1963    
1964            /* In multiline mode, we may have already printed the complete line
1965            and its line-ending characters (if they matched the pattern), so there
1966            may be no more to print. */
1967    
1968            plength = (int)((linelength + endlinelength) - startoffset);
1969            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1970          }          }
1971        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1972          /* Not colouring; no need to search for further matches */
1973    
1974          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1975        }        }
1976    
1977      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1978        given, flush the output. */
1979    
1980        if (line_buffered) fflush(stdout);
1981      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1982    
1983      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1235  while (ptr < endptr) Line 2008  while (ptr < endptr)
2008    /* Advance to after the newline and increment the line number. The file    /* Advance to after the newline and increment the line number. The file
2009    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
2010    
2011      END_ONE_MATCH:
2012    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
2013    filepos += linelength + endlinelength;    filepos += (int)(linelength + endlinelength);
2014    linenumber++;    linenumber++;
2015    
2016      /* If input is line buffered, and the buffer is not yet full, read another
2017      line and add it into the buffer. */
2018    
2019      if (input_line_buffered && bufflength < (size_t)bufsize)
2020        {
2021        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2022        bufflength += add;
2023        endptr += add;
2024        }
2025    
2026    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
2027    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
2028    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
2029    about to be lost, print them. */    about to be lost, print them. */
2030    
2031    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
2032      {      {
2033      if (after_context > 0 &&      if (after_context > 0 &&
2034          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
2035          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
2036        {        {
2037        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2038        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 1256  while (ptr < endptr) Line 2040  while (ptr < endptr)
2040    
2041      /* Now do the shuffle */      /* Now do the shuffle */
2042    
2043      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2044      ptr -= MBUFTHIRD;      ptr -= bufthird;
2045    
2046  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
2047      if (frtype == FR_LIBZ)      if (frtype == FR_LIBZ)
2048        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
2049          gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);          gzread (ingz, main_buffer + 2*bufthird, bufthird);
2050      else      else
2051  #endif  #endif
2052    
2053  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
2054      if (frtype == FR_LIBBZ2)      if (frtype == FR_LIBBZ2)
2055        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
2056          BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);          BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2057      else      else
2058  #endif  #endif
2059    
2060      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*bufthird +
2061          (input_line_buffered?
2062      endptr = buffer + bufflength;         read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2063           fread(main_buffer + 2*bufthird, 1, bufthird, in));
2064        endptr = main_buffer + bufflength;
2065    
2066      /* Adjust any last match point */      /* Adjust any last match point */
2067    
2068      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2069      }      }
2070    }     /* Loop through the whole file */    }     /* Loop through the whole file */
2071    
2072  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
2073  hyphenpending if it prints something. */  hyphenpending if it prints something. */
2074    
2075  if (!only_matching && !count_only)  if (!show_only_matching && !count_only)
2076    {    {
2077    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2078    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1305  if (filenames == FN_NOMATCH_ONLY) Line 2091  if (filenames == FN_NOMATCH_ONLY)
2091    
2092  if (count_only)  if (count_only)
2093    {    {
2094    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
2095    fprintf(stdout, "%d\n", count);      {
2096        if (printname != NULL && filenames != FN_NONE)
2097          fprintf(stdout, "%s:", printname);
2098        fprintf(stdout, "%d\n", count);
2099        }
2100    }    }
2101    
2102  return rc;  return rc;
# Line 1326  Arguments: Line 2116  Arguments:
2116    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
2117    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
2118    
2119  Returns:   0 if there was at least one match  Returns:  -1 the file/directory was skipped
2120               0 if there was at least one match
2121             1 if there were no matches             1 if there were no matches
2122             2 there was some kind of error             2 there was some kind of error
2123    
# Line 1337  static int Line 2128  static int
2128  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2129  {  {
2130  int rc = 1;  int rc = 1;
 int sep;  
2131  int frtype;  int frtype;
 int pathlen;  
2132  void *handle;  void *handle;
2133    char *lastcomp;
2134  FILE *in = NULL;           /* Ensure initialized */  FILE *in = NULL;           /* Ensure initialized */
2135    
2136  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 1351  gzFile ingz = NULL; Line 2141  gzFile ingz = NULL;
2141  BZFILE *inbz2 = NULL;  BZFILE *inbz2 = NULL;
2142  #endif  #endif
2143    
2144    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2145    int pathlen;
2146    #endif
2147    
2148    #if defined NATIVE_ZOS
2149    int zos_type;
2150    FILE *zos_test_file;
2151    #endif
2152    
2153  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
2154    
2155  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
2156    {    {
2157    return pcregrep(stdin, FR_PLAIN,    return pcregrep(stdin, FR_PLAIN, stdin_name,
2158      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2159        stdin_name : NULL);        stdin_name : NULL);
2160    }    }
2161    
2162  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2163  each file within it, subject to any include or exclude patterns that were set.  directories, whereas --include and --exclude apply to everything else. The test
2164  The scanning code is localized so it can be made system-specific. */  is against the final component of the path. */
2165    
2166    lastcomp = strrchr(pathname, FILESEP);
2167    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2168    
2169    /* If the file is a directory, skip if not recursing or if explicitly excluded.
2170    Otherwise, scan the directory and recurse for each path within it. The scanning
2171    code is localized so it can be made system-specific. */
2172    
2173    
2174    /* For z/OS, determine the file type. */
2175    
2176    #if defined NATIVE_ZOS
2177    zos_test_file =  fopen(pathname,"rb");
2178    
2179    if (zos_test_file == NULL)
2180       {
2181       if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2182         pathname, strerror(errno));
2183       return -1;
2184       }
2185    zos_type = identifyzosfiletype (zos_test_file);
2186    fclose (zos_test_file);
2187    
2188    /* Handle a PDS in separate code */
2189    
2190    if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2191       {
2192       return travelonpdsdir (pathname, only_one_at_top);
2193       }
2194    
2195    /* Deal with regular files in the normal way below. These types are:
2196       zos_type == __ZOS_PDS_MEMBER
2197       zos_type == __ZOS_PS
2198       zos_type == __ZOS_VSAM_KSDS
2199       zos_type == __ZOS_VSAM_ESDS
2200       zos_type == __ZOS_VSAM_RRDS
2201    */
2202    
2203    /* Handle a z/OS directory using common code. */
2204    
2205    else if (zos_type == __ZOS_HFS)
2206     {
2207    #endif  /* NATIVE_ZOS */
2208    
2209    
2210  if ((sep = isdirectory(pathname)) != 0)  /* Handle directories: common code for all OS */
2211    
2212    if (isdirectory(pathname))
2213    {    {
2214    if (dee_action == dee_SKIP) return 1;    if (dee_action == dee_SKIP ||
2215          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2216        return -1;
2217    
2218    if (dee_action == dee_RECURSE)    if (dee_action == dee_RECURSE)
2219      {      {
2220      char buffer[1024];      char buffer[1024];
# Line 1383  if ((sep = isdirectory(pathname)) != 0) Line 2231  if ((sep = isdirectory(pathname)) != 0)
2231    
2232      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
2233        {        {
2234        int frc, blen;        int frc;
2235        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
       blen = strlen(buffer);  
   
       if (exclude_compiled != NULL &&  
           pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
         continue;  
   
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
   
2236        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2237        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
2238         else if (frc == 0 && rc == 1) rc = 0;         else if (frc == 0 && rc == 1) rc = 0;
# Line 1405  if ((sep = isdirectory(pathname)) != 0) Line 2243  if ((sep = isdirectory(pathname)) != 0)
2243      }      }
2244    }    }
2245    
2246  /* If the file is not a directory and not a regular file, skip it if that's  #if defined NATIVE_ZOS
2247  been requested. */   }
2248    #endif
2249    
2250  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;  /* If the file is not a directory, check for a regular file, and if it is not,
2251    skip it if that's been requested. Otherwise, check for an explicit inclusion or
2252    exclusion. */
2253    
2254    else if (
2255    #if defined NATIVE_ZOS
2256            (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2257    #else  /* all other OS */
2258            (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2259    #endif
2260            !test_incexc(lastcomp, include_patterns, exclude_patterns))
2261      return -1;  /* File skipped */
2262    
2263  /* Control reaches here if we have a regular file, or if we have a directory  /* Control reaches here if we have a regular file, or if we have a directory
2264  and recursion or skipping was not requested, or if we have anything else and  and recursion or skipping was not requested, or if we have anything else and
# Line 1416  skipping was not requested. The scan pro Line 2266  skipping was not requested. The scan pro
2266  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
2267  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
2268    
2269  pathlen = strlen(pathname);  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2270    pathlen = (int)(strlen(pathname));
2271    #endif
2272    
2273  /* Open using zlib if it is supported and the file name ends with .gz. */  /* Open using zlib if it is supported and the file name ends with .gz. */
2274    
# Line 1456  an attempt to read a .bz2 file indicates Line 2308  an attempt to read a .bz2 file indicates
2308  PLAIN_FILE:  PLAIN_FILE:
2309  #endif  #endif
2310    {    {
2311    in = fopen(pathname, "r");    in = fopen(pathname, "rb");
2312    handle = (void *)in;    handle = (void *)in;
2313    frtype = FR_PLAIN;    frtype = FR_PLAIN;
2314    }    }
# Line 1473  if (handle == NULL) Line 2325  if (handle == NULL)
2325    
2326  /* Now grep the file */  /* Now grep the file */
2327    
2328  rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||  rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2329    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2330    
2331  /* Close in an appropriate manner. */  /* Close in an appropriate manner. */
# Line 1484  if (frtype == FR_LIBZ) Line 2336  if (frtype == FR_LIBZ)
2336  else  else
2337  #endif  #endif
2338    
2339  /* If it is a .bz2 file and the result is 2, it means that the first attempt to  /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2340  read failed. If the error indicates that the file isn't in fact bzipped, try  read failed. If the error indicates that the file isn't in fact bzipped, try
2341  again as a normal file. */  again as a normal file. */
2342    
2343  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
2344  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
2345    {    {
2346    if (rc == 2)    if (rc == 3)
2347      {      {
2348      int errnum;      int errnum;
2349      const char *err = BZ2_bzerror(inbz2, &errnum);      const char *err = BZ2_bzerror(inbz2, &errnum);
# Line 1503  if (frtype == FR_LIBBZ2) Line 2355  if (frtype == FR_LIBBZ2)
2355      else if (!silent)      else if (!silent)
2356        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2357          pathname, err);          pathname, err);
2358        rc = 2;    /* The normal "something went wrong" code */
2359      }      }
2360    BZ2_bzclose(inbz2);    BZ2_bzclose(inbz2);
2361    }    }
# Line 1520  return rc; Line 2373  return rc;
2373    
2374    
2375    
   
 /*************************************************  
 *                Usage function                  *  
 *************************************************/  
   
 static int  
 usage(int rc)  
 {  
 option_item *op;  
 fprintf(stderr, "Usage: pcregrep [-");  
 for (op = optionlist; op->one_char != 0; op++)  
   {  
   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);  
   }  
 fprintf(stderr, "] [long options] [pattern] [files]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information and the long "  
   "options.\n");  
 return rc;  
 }  
   
   
   
   
 /*************************************************  
 *                Help function                   *  
 *************************************************/  
   
 static void  
 help(void)  
 {  
 option_item *op;  
   
 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  
 printf("Search for PATTERN in each FILE or standard input.\n");  
 printf("PATTERN must be present if neither -e nor -f is used.\n");  
 printf("\"-\" can be used as a file name to mean STDIN.\n");  
   
 #ifdef SUPPORT_LIBZ  
 printf("Files whose names end in .gz are read using zlib.\n");  
 #endif  
   
 #ifdef SUPPORT_LIBBZ2  
 printf("Files whose names end in .bz2 are read using bzlib2.\n");  
 #endif  
   
 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2  
 printf("Other files and the standard input are read as plain files.\n\n");  
 #else  
 printf("All files are read as plain files, without any interpretation.\n\n");  
 #endif  
   
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
 printf("Options:\n");  
   
 for (op = optionlist; op->one_char != 0; op++)  
   {  
   int n;  
   char s[4];  
   if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
   n = 30 - printf("  %s --%s", s, op->long_name);  
   if (n < 1) n = 1;  
   printf("%.*s%s\n", n, "                    ", op->help_text);  
   }  
   
 printf("\nWhen reading patterns from a file instead of using a command line option,\n");  
 printf("trailing white space is removed and blank lines are ignored.\n");  
 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
 }  
   
   
   
   
2376  /*************************************************  /*************************************************
2377  *    Handle a single-letter, no data option      *  *    Handle a single-letter, no data option      *
2378  *************************************************/  *************************************************/
# Line 1605  handle_option(int letter, int options) Line 2383  handle_option(int letter, int options)
2383  switch(letter)  switch(letter)
2384    {    {
2385    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
2386    case N_HELP: help(); exit(0);    case N_HELP: help(); pcregrep_exit(0);
2387      case N_LBUFFER: line_buffered = TRUE; break;
2388    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
2389      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2390      case 'a': binary_files = BIN_TEXT; break;
2391    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
2392    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
2393    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
2394      case 'I': binary_files = BIN_NOMATCH; break;
2395    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
2396    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
2397    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2398    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
2399    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2400    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
2401    case 'o': only_matching = TRUE; break;  
2402      case 'o':
2403      only_matching_last = add_number(0, only_matching_last);
2404      if (only_matching == NULL) only_matching = only_matching_last;
2405      break;
2406    
2407    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
2408    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
2409    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1626  switch(letter) Line 2413  switch(letter)
2413    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
2414    
2415    case 'V':    case 'V':
2416    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stdout, "pcregrep version %s\n", pcre_version());
2417    exit(0);    pcregrep_exit(0);
2418    break;    break;
2419    
2420    default:    default:
2421    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2422    exit(usage(2));    pcregrep_exit(usage(2));
2423    }    }
2424    
2425  return options;  return options;
# Line 1670  return buffer; Line 2457  return buffer;
2457  *          Compile a single pattern              *  *          Compile a single pattern              *
2458  *************************************************/  *************************************************/
2459    
2460  /* When the -F option has been used, this is called for each substring.  /* Do nothing if the pattern has already been compiled. This is the case for
2461  Otherwise it's called for each supplied pattern.  include/exclude patterns read from a file.
2462    
2463    When the -F option has been used, each "pattern" may be a list of strings,
2464    separated by line breaks. They will be matched literally. We split such a
2465    string and compile the first substring, inserting an additional block into the
2466    pattern chain.
2467    
2468  Arguments:  Arguments:
2469    pattern        the pattern string    p              points to the pattern block
2470    options        the PCRE options    options        the PCRE options
2471    filename       the file name, or NULL for a command-line pattern    popts          the processing options
2472      fromfile       TRUE if the pattern was read from a file
2473      fromtext       file name or identifying text (e.g. "include")
2474    count          0 if this is the only command line pattern, or    count          0 if this is the only command line pattern, or
2475                   number of the command line pattern, or                   number of the command line pattern, or
2476                   linenumber for a pattern from a file                   linenumber for a pattern from a file
# Line 1685  Returns:         TRUE on success, FALSE Line 2479  Returns:         TRUE on success, FALSE
2479  */  */
2480    
2481  static BOOL  static BOOL
2482  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_pattern(patstr *p, int options, int popts, int fromfile,
2483      const char *fromtext, int count)
2484  {  {
2485  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
2486  const char *error;  const char *error;
2487    char *ps = p->string;
2488    int patlen = strlen(ps);
2489  int errptr;  int errptr;
2490    
2491  if (pattern_count >= MAX_PATTERN_COUNT)  if (p->compiled != NULL) return TRUE;
   {  
   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",  
     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);  
   return FALSE;  
   }  
2492    
2493  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  if ((popts & PO_FIXED_STRINGS) != 0)
   suffix[process_options]);  
 pattern_list[pattern_count] =  
   pcre_compile(buffer, options, &error, &errptr, pcretables);  
 if (pattern_list[pattern_count] != NULL)  
2494    {    {
2495    pattern_count++;    int ellength;
2496    return TRUE;    char *eop = ps + patlen;
2497      char *pe = end_of_line(ps, eop, &ellength);
2498    
2499      if (ellength != 0)
2500        {
2501        if (add_pattern(pe, p) == NULL) return FALSE;
2502        patlen = (int)(pe - ps - ellength);
2503        }
2504    }    }
2505    
2506    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2507    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2508    if (p->compiled != NULL) return TRUE;
2509    
2510  /* Handle compile errors */  /* Handle compile errors */
2511    
2512  errptr -= (int)strlen(prefix[process_options]);  errptr -= (int)strlen(prefix[popts]);
2513  if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);  if (errptr > patlen) errptr = patlen;
2514    
2515  if (filename == NULL)  if (fromfile)
2516    {    {
2517    if (count == 0)    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2518      fprintf(stderr, "pcregrep: Error in command-line regex "      "at offset %d: %s\n", count, fromtext, errptr, error);
       "at offset %d: %s\n", errptr, error);  
   else  
     fprintf(stderr, "pcregrep: Error in %s command-line regex "  
       "at offset %d: %s\n", ordin(count), errptr, error);  
2519    }    }
2520  else  else
2521    {    {
2522    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "    if (count == 0)
2523      "at offset %d: %s\n", count, filename, errptr, error);      fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2524          fromtext, errptr, error);
2525      else
2526        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2527          ordin(count), fromtext, errptr, error);
2528    }    }
2529    
2530  return FALSE;  return FALSE;
# Line 1734  return FALSE; Line 2533  return FALSE;
2533    
2534    
2535  /*************************************************  /*************************************************
2536  *           Compile one supplied pattern         *  *     Read and compile a file of patterns        *
2537  *************************************************/  *************************************************/
2538    
2539  /* When the -F option has been used, each string may be a list of strings,  /* This is used for --filelist, --include-from, and --exclude-from.
 separated by line breaks. They will be matched literally.  
2540    
2541  Arguments:  Arguments:
2542    pattern        the pattern string    name         the name of the file; "-" is stdin
2543    options        the PCRE options    patptr       pointer to the pattern chain anchor
2544    filename       the file name, or NULL for a command-line pattern    patlastptr   pointer to the last pattern pointer
2545    count          0 if this is the only command line pattern, or    popts        the process options to pass to pattern_compile()
                  number of the command line pattern, or  
                  linenumber for a pattern from a file  
2546    
2547  Returns:         TRUE on success, FALSE after an error  Returns:       TRUE if all went well
2548  */  */
2549    
2550  static BOOL  static BOOL
2551  compile_pattern(char *pattern, int options, char *filename, int count)  read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2552  {  {
2553  if ((process_options & PO_FIXED_STRINGS) != 0)  int linenumber = 0;
2554    FILE *f;
2555    char *filename;
2556    char buffer[PATBUFSIZE];
2557    
2558    if (strcmp(name, "-") == 0)
2559      {
2560      f = stdin;
2561      filename = stdin_name;
2562      }
2563    else
2564      {
2565      f = fopen(name, "r");
2566      if (f == NULL)
2567        {
2568        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2569        return FALSE;
2570        }
2571      filename = name;
2572      }
2573    
2574    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2575    {    {
2576    char *eop = pattern + strlen(pattern);    char *s = buffer + (int)strlen(buffer);
2577    char buffer[MBUFTHIRD];    while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2578      *s = 0;
2579      linenumber++;
2580      if (buffer[0] == 0) continue;   /* Skip blank lines */
2581    
2582      /* Note: this call to add_pattern() puts a pointer to the local variable
2583      "buffer" into the pattern chain. However, that pointer is used only when
2584      compiling the pattern, which happens immediately below, so we flatten it
2585      afterwards, as a precaution against any later code trying to use it. */
2586    
2587      *patlastptr = add_pattern(buffer, *patlastptr);
2588      if (*patlastptr == NULL)
2589        {
2590        if (f != stdin) fclose(f);
2591        return FALSE;
2592        }
2593      if (*patptr == NULL) *patptr = *patlastptr;
2594    
2595      /* This loop is needed because compiling a "pattern" when -F is set may add
2596      on additional literal patterns if the original contains a newline. In the
2597      common case, it never will, because fgets() stops at a newline. However,
2598      the -N option can be used to give pcregrep a different newline setting. */
2599    
2600    for(;;)    for(;;)
2601      {      {
2602      int ellength;      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2603      char *p = end_of_line(pattern, eop, &ellength);          linenumber))
2604      if (ellength == 0)        {
2605        return compile_single_pattern(pattern, options, filename, count);        if (f != stdin) fclose(f);
     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);  
     pattern = p;  
     if (!compile_single_pattern(buffer, options, filename, count))  
2606        return FALSE;        return FALSE;
2607          }
2608        (*patlastptr)->string = NULL;            /* Insurance */
2609        if ((*patlastptr)->next == NULL) break;
2610        *patlastptr = (*patlastptr)->next;
2611      }      }
2612    }    }
2613  else return compile_single_pattern(pattern, options, filename, count);  
2614    if (f != stdin) fclose(f);
2615    return TRUE;
2616  }  }
2617    
2618    
# Line 1786  main(int argc, char **argv) Line 2628  main(int argc, char **argv)
2628  {  {
2629  int i, j;  int i, j;
2630  int rc = 1;  int rc = 1;
 int pcre_options = 0;  
 int cmd_pattern_count = 0;  
 int hint_count = 0;  
 int errptr;  
2631  BOOL only_one_at_top;  BOOL only_one_at_top;
2632  char *patterns[MAX_PATTERN_COUNT];  patstr *cp;
2633    fnstr *fn;
2634  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2635  const char *error;  const char *error;
2636    
2637    #ifdef SUPPORT_PCREGREP_JIT
2638    pcre_jit_stack *jit_stack = NULL;
2639    #endif
2640    
2641  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2642  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2643  */  Note that the return values from pcre_config(), though derived from the ASCII
2644    codes, are the same in EBCDIC environments, so we must use the actual values
2645    rather than escapes such as as '\r'. */
2646    
2647  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2648  switch(i)  switch(i)
2649    {    {
2650    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
2651    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
2652    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2653    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
2654    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
2655    }    }
2656    
2657  /* Process the options */  /* Process the options */
# Line 1825  for (i = 1; i < argc; i++) Line 2670  for (i = 1; i < argc; i++)
2670    
2671    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2672      {      {
2673      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_files != NULL || patterns != NULL) break;
2674        else exit(usage(2));        else pcregrep_exit(usage(2));
2675      }      }
2676    
2677    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1848  for (i = 1; i < argc; i++) Line 2693  for (i = 1; i < argc; i++)
2693      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2694      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2695      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2696      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2697      these categories, fortunately. */      both these categories. */
2698    
2699      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2700        {        {
2701        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2702        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2703        if (opbra == NULL)     /* Not a (p) case */  
2704          /* Handle options with only one spelling of the name */
2705    
2706          if (opbra == NULL)     /* Does not contain '(' */
2707          {          {
2708          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2709            {            {
# Line 1863  for (i = 1; i < argc; i++) Line 2711  for (i = 1; i < argc; i++)
2711            }            }
2712          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2713            {            {
2714            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2715            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2716                (int)strlen(arg) : (int)(argequals - arg);
2717            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2718              {              {
2719              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1877  for (i = 1; i < argc; i++) Line 2726  for (i = 1; i < argc; i++)
2726              }              }
2727            }            }
2728          }          }
2729        else                   /* Special case xxxx(p) */  
2730          /* Handle options with an alternate spelling of the name */
2731    
2732          else
2733          {          {
2734          char buff1[24];          char buff1[24];
2735          char buff2[24];          char buff2[24];
2736          int baselen = opbra - op->long_name;  
2737            int baselen = (int)(opbra - op->long_name);
2738            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2739            int arglen = (argequals == NULL || equals == NULL)?
2740              (int)strlen(arg) : (int)(argequals - arg);
2741    
2742          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2743          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2744            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2745          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2746               strncmp(arg, buff2, arglen) == 0)
2747              {
2748              if (equals != NULL && argequals != NULL)
2749                {
2750                option_data = argequals;
2751                if (*option_data == '=')
2752                  {
2753                  option_data++;
2754                  longopwasequals = TRUE;
2755                  }
2756                }
2757            break;            break;
2758              }
2759          }          }
2760        }        }
2761    
2762      if (op->one_char == 0)      if (op->one_char == 0)
2763        {        {
2764        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2765        exit(usage(2));        pcregrep_exit(usage(2));
2766        }        }
2767      }      }
2768    
   
2769    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2770    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2771    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1928  for (i = 1; i < argc; i++) Line 2796  for (i = 1; i < argc; i++)
2796      {      {
2797      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2798      longop = FALSE;      longop = FALSE;
2799    
2800      while (*s != 0)      while (*s != 0)
2801        {        {
2802        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2803          { if (*s == op->one_char) break; }          {
2804            if (*s == op->one_char) break;
2805            }
2806        if (op->one_char == 0)        if (op->one_char == 0)
2807          {          {
2808          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2809            *s, argv[i]);            *s, argv[i]);
2810          exit(usage(2));          pcregrep_exit(usage(2));
2811            }
2812    
2813          option_data = s+1;
2814    
2815          /* Break out if this is the last character in the string; it's handled
2816          below like a single multi-char option. */
2817    
2818          if (*option_data == 0) break;
2819    
2820          /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2821          are used for ones that either have a numerical number or defaults, i.e.
2822          the data is optional. If a digit follows, there is data; if not, carry on
2823          with other single-character options in the same string. */
2824    
2825          if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2826            {
2827            if (isdigit((unsigned char)s[1])) break;
2828          }          }
2829        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for an option with data */
2830          {          {
2831          option_data = s+1;          if (op->type != OP_NODATA) break;
         break;  
2832          }          }
2833    
2834          /* Handle a single-character option with no data, then loop for the
2835          next character in the string. */
2836    
2837        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2838        }        }
2839      }      }
# Line 1957  for (i = 1; i < argc; i++) Line 2848  for (i = 1; i < argc; i++)
2848      continue;      continue;
2849      }      }
2850    
2851    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2852    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2853    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2854    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2855    
2856    if (*option_data == 0 &&    if (*option_data == 0 &&
2857        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2858           op->type == OP_OP_NUMBERS))
2859      {      {
2860      switch (op->one_char)      switch (op->one_char)
2861        {        {
2862        case N_COLOUR:        case N_COLOUR:
2863        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2864        break;        break;
2865    
2866          case 'o':
2867          only_matching_last = add_number(0, only_matching_last);
2868          if (only_matching == NULL) only_matching = only_matching_last;
2869          break;
2870    
2871  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2872        case 'S':        case 'S':
2873        S_arg = 0;        S_arg = 0;
# Line 1986  for (i = 1; i < argc; i++) Line 2884  for (i = 1; i < argc; i++)
2884      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2885        {        {
2886        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2887        exit(usage(2));        pcregrep_exit(usage(2));
2888        }        }
2889      option_data = argv[++i];      option_data = argv[++i];
2890      }      }
2891    
2892    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2893    multiple times to create a list of patterns. */    added to a chain of numbers. */
2894    
2895      if (op->type == OP_OP_NUMBERS)
2896        {
2897        unsigned long int n = decode_number(option_data, op, longop);
2898        omdatastr *omd = (omdatastr *)op->dataptr;
2899        *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2900        if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2901        }
2902    
2903      /* If the option type is OP_PATLIST, it's the -e option, or one of the
2904      include/exclude options, which can be called multiple times to create lists
2905      of patterns. */
2906    
2907      else if (op->type == OP_PATLIST)
2908        {
2909        patdatastr *pd = (patdatastr *)op->dataptr;
2910        *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2911        if (*(pd->lastptr) == NULL) goto EXIT2;
2912        if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2913        }
2914    
2915      /* If the option type is OP_FILELIST, it's one of the options that names a
2916      file. */
2917    
2918    if (op->type == OP_PATLIST)    else if (op->type == OP_FILELIST)
2919      {      {
2920      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      fndatastr *fd = (fndatastr *)op->dataptr;
2921        fn = (fnstr *)malloc(sizeof(fnstr));
2922        if (fn == NULL)
2923        {        {
2924        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",        fprintf(stderr, "pcregrep: malloc failed\n");
2925          MAX_PATTERN_COUNT);        goto EXIT2;
2926        return 2;        }
2927        fn->next = NULL;
2928        fn->name = option_data;
2929        if (*(fd->anchor) == NULL)
2930          *(fd->anchor) = fn;
2931        else
2932          (*(fd->lastptr))->next = fn;
2933        *(fd->lastptr) = fn;
2934        }
2935    
2936      /* Handle OP_BINARY_FILES */
2937    
2938      else if (op->type == OP_BINFILES)
2939        {
2940        if (strcmp(option_data, "binary") == 0)
2941          binary_files = BIN_BINARY;
2942        else if (strcmp(option_data, "without-match") == 0)
2943          binary_files = BIN_NOMATCH;
2944        else if (strcmp(option_data, "text") == 0)
2945          binary_files = BIN_TEXT;
2946        else
2947          {
2948          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2949            option_data);
2950          pcregrep_exit(usage(2));
2951        }        }
     patterns[cmd_pattern_count++] = option_data;  
2952      }      }
2953    
2954    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with a single string or numeric data value. */
2955    
2956    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2957               op->type != OP_OP_NUMBER)
2958      {      {
2959      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2960      }      }
2961    else    else
2962      {      {
2963      char *endptr;      unsigned long int n = decode_number(option_data, op, longop);
2964      int n = strtoul(option_data, &endptr, 10);      if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2965      if (*endptr != 0)        else *((int *)op->dataptr) = n;
       {  
       if (longop)  
         {  
         char *equals = strchr(op->long_name, '=');  
         int nlen = (equals == NULL)? (int)strlen(op->long_name) :  
           equals - op->long_name;  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",  
           option_data, nlen, op->long_name);  
         }  
       else  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",  
           option_data, op->one_char);  
       exit(usage(2));  
       }  
     *((int *)op->dataptr) = n;  
2966      }      }
2967    }    }
2968    
# Line 2044  if (both_context > 0) Line 2976  if (both_context > 0)
2976    }    }
2977    
2978  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2979  However, the latter two set the only_matching flag. */  However, all three set show_only_matching because they display, each in their
2980    own way, only the data that has matched. */
2981    
2982  if ((only_matching && (file_offsets || line_offsets)) ||  if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2983      (file_offsets && line_offsets))      (file_offsets && line_offsets))
2984    {    {
2985    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2986      "and/or --line-offsets\n");      "and/or --line-offsets\n");
2987    exit(usage(2));    pcregrep_exit(usage(2));
2988    }    }
2989    
2990  if (file_offsets || line_offsets) only_matching = TRUE;  if (only_matching != NULL || file_offsets || line_offsets)
2991      show_only_matching = TRUE;
2992    
2993  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2994  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2071  if (locale == NULL) Line 3005  if (locale == NULL)
3005    locale_from = "LC_CTYPE";    locale_from = "LC_CTYPE";
3006    }    }
3007    
3008  /* If a locale has been provided, set it, and generate the tables the PCRE  /* If a locale is set, use it to generate the tables the PCRE needs. Otherwise,
3009  needs. Otherwise, pcretables==NULL, which causes the use of default tables. */  pcretables==NULL, which causes the use of default tables. */
3010    
3011  if (locale != NULL)  if (locale != NULL)
3012    {    {
# Line 2080  if (locale != NULL) Line 3014  if (locale != NULL)
3014      {      {
3015      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
3016        locale, locale_from);        locale, locale_from);
3017      return 2;      goto EXIT2;
3018      }      }
3019    pcretables = pcre_maketables();    pcretables = pcre_maketables();
3020    }    }
# Line 2095  if (colour_option != NULL && strcmp(colo Line 3029  if (colour_option != NULL && strcmp(colo
3029      {      {
3030      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
3031        colour_option);        colour_option);
3032      return 2;      goto EXIT2;
3033      }      }
3034    if (do_colour)    if (do_colour)
3035      {      {
# Line 2135  else if (strcmp(newline, "anycrlf") == 0 Line 3069  else if (strcmp(newline, "anycrlf") == 0
3069  else  else
3070    {    {
3071    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3072    return 2;    goto EXIT2;
3073    }    }
3074    
3075  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
# Line 2148  if (dee_option != NULL) Line 3082  if (dee_option != NULL)
3082    else    else
3083      {      {
3084      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3085      return 2;      goto EXIT2;
3086      }      }
3087    }    }
3088    
# Line 2159  if (DEE_option != NULL) Line 3093  if (DEE_option != NULL)
3093    else    else
3094      {      {
3095      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3096      return 2;      goto EXIT2;
3097      }      }
3098    }    }
3099    
# Line 2178  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 3112  if (jfriedl_XT != 0 || jfriedl_XR != 0)
3112    }    }
3113  #endif  #endif
3114    
3115  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer. */
3116    
3117  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  bufsize = 3*bufthird;
3118  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  main_buffer = (char *)malloc(bufsize);
3119    
3120  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL)
3121    {    {
3122    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
3123    goto EXIT2;    goto EXIT2;
3124    }    }
3125    
3126  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there are no files provided by -f,
3127  the first argument is the one and only pattern, and it must exist. */  the first argument is the one and only pattern, and it must exist. */
3128    
3129  if (cmd_pattern_count == 0 && pattern_filename == NULL)  if (patterns == NULL && pattern_files == NULL)
3130    {    {
3131    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
3132    patterns[cmd_pattern_count++] = argv[i++];    patterns = patterns_last = add_pattern(argv[i++], NULL);
3133      if (patterns == NULL) goto EXIT2;
3134    }    }
3135    
3136  /* Compile the patterns that were provided on the command line, either by  /* Compile the patterns that were provided on the command line, either by
3137  multiple uses of -e or as a single unkeyed pattern. */  multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3138    after all the command-line options are read so that we know which PCRE options
3139    to use. When -F is used, compile_pattern() may add another block into the
3140    chain, so we must not access the next pointer till after the compile. */
3141    
3142  for (j = 0; j < cmd_pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3143    {    {
3144    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3145         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 1 && patterns->next == NULL)? 0 : j))
3146      goto EXIT2;      goto EXIT2;
3147    }    }
3148    
3149  /* Compile the regular expressions that are provided in a file. */  /* Read and compile the regular expressions that are provided in files. */
3150    
3151  if (pattern_filename != NULL)  for (fn = pattern_files; fn != NULL; fn = fn->next)
3152    {    {
3153    int linenumber = 0;    if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3154    FILE *f;      goto EXIT2;
3155    char *filename;    }
   char buffer[MBUFTHIRD];  
3156    
3157    if (strcmp(pattern_filename, "-") == 0)  /* Study the regular expressions, as we will be running them many times. If an
3158      {  extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3159      f = stdin;  returned, even if studying produces no data. */
     filename = stdin_name;  
     }  
   else  
     {  
     f = fopen(pattern_filename, "r");  
     if (f == NULL)  
       {  
       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,  
         strerror(errno));  
       goto EXIT2;  
       }  
     filename = pattern_filename;  
     }  
3160    
3161    while (fgets(buffer, MBUFTHIRD, f) != NULL)  if (match_limit > 0 || match_limit_recursion > 0)
3162      {    study_options |= PCRE_STUDY_EXTRA_NEEDED;
     char *s = buffer + (int)strlen(buffer);  
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;  
     *s = 0;  
     linenumber++;  
     if (buffer[0] == 0) continue;   /* Skip blank lines */  
     if (!compile_pattern(buffer, pcre_options, filename, linenumber))  
       goto EXIT2;  
     }  
3163    
3164    if (f != stdin) fclose(f);  /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
   }  
3165    
3166  /* Study the regular expressions, as we will be running them many times */  #ifdef SUPPORT_PCREGREP_JIT
3167    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3168      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3169    #endif
3170    
3171  for (j = 0; j < pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3172    {    {
3173    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
3174    if (error != NULL)    if (error != NULL)
3175      {      {
3176      char s[16];      char s[16];
3177      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3178      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3179      goto EXIT2;      goto EXIT2;
3180      }      }
3181    hint_count++;  #ifdef SUPPORT_PCREGREP_JIT
3182      if (jit_stack != NULL && cp->hint != NULL)
3183        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3184    #endif
3185    }    }
3186    
3187  /* If there are include or exclude patterns, compile them. */  /* If --match-limit or --recursion-limit was set, put the value(s) into the
3188    pcre_extra block for each pattern. There will always be an extra block because
3189    of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3190    
3191  if (exclude_pattern != NULL)  for (cp = patterns; cp != NULL; cp = cp->next)
3192    {    {
3193    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,    if (match_limit > 0)
     pcretables);  
   if (exclude_compiled == NULL)  
3194      {      {
3195      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3196        errptr, error);      cp->hint->match_limit = match_limit;
3197      goto EXIT2;      }
3198    
3199      if (match_limit_recursion > 0)
3200        {
3201        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3202        cp->hint->match_limit_recursion = match_limit_recursion;
3203      }      }
3204    }    }
3205    
3206  if (include_pattern != NULL)  /* If there are include or exclude patterns read from the command line, compile
3207    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3208    0. */
3209    
3210    for (j = 0; j < 4; j++)
3211    {    {
3212    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,    int k;
3213      pcretables);    for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
   if (include_compiled == NULL)  
3214      {      {
3215      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3216        errptr, error);           (k == 1 && cp->next == NULL)? 0 : k))
3217      goto EXIT2;        goto EXIT2;
3218      }      }
3219    }    }
3220    
3221  /* If there are no further arguments, do the business on stdin and exit. */  /* Read and compile include/exclude patterns from files. */
3222    
3223    for (fn = include_from; fn != NULL; fn = fn->next)
3224      {
3225      if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3226        goto EXIT2;
3227      }
3228    
3229    for (fn = exclude_from; fn != NULL; fn = fn->next)
3230      {
3231      if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3232        goto EXIT2;
3233      }
3234    
3235    /* If there are no files that contain lists of files to search, and there are
3236    no file arguments, search stdin, and then exit. */
3237    
3238  if (i >= argc)  if (file_lists == NULL && i >= argc)
3239    {    {
3240    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3241        (filenames > FN_DEFAULT)? stdin_name : NULL);
3242    goto EXIT;    goto EXIT;
3243    }    }
3244    
3245  /* Otherwise, work through the remaining arguments as files or directories.  /* If any files that contains a list of files to search have been specified,
3246  Pass in the fact that there is only one argument at top level - this suppresses  read them line by line and search the given files. */
3247  the file name if the argument is not a directory and filenames are not  
3248  otherwise forced. */  for (fn = file_lists; fn != NULL; fn = fn->next)
3249      {
3250      char buffer[PATBUFSIZE];
3251      FILE *fl;
3252      if (strcmp(fn->name, "-") == 0) fl = stdin; else
3253        {
3254        fl = fopen(fn->name, "rb");
3255        if (fl == NULL)
3256          {
3257          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3258            strerror(errno));
3259          goto EXIT2;
3260          }
3261        }
3262      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3263        {
3264        int frc;
3265        char *end = buffer + (int)strlen(buffer);
3266        while (end > buffer && isspace(end[-1])) end--;
3267        *end = 0;
3268        if (*buffer != 0)
3269          {
3270          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3271          if (frc > 1) rc = frc;
3272            else if (frc == 0 && rc == 1) rc = 0;
3273          }
3274        }
3275      if (fl != stdin) fclose(fl);
3276      }
3277    
3278    /* After handling file-list, work through remaining arguments. Pass in the fact
3279    that there is only one argument at top level - this suppresses the file name if
3280    the argument is not a directory and filenames are not otherwise forced. */
3281    
3282  only_one_at_top = i == argc - 1;   /* Catch initial value of i */  only_one_at_top = i == argc - 1 && file_lists == NULL;
3283    
3284  for (; i < argc; i++)  for (; i < argc; i++)
3285    {    {
# Line 2313  for (; i < argc; i++) Line 3290  for (; i < argc; i++)
3290    }    }
3291    
3292  EXIT:  EXIT:
3293  if (pattern_list != NULL)  #ifdef SUPPORT_PCREGREP_JIT
3294    {  if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3295    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);  #endif
3296    free(pattern_list);  
3297    }  free(main_buffer);
3298  if (hints_list != NULL)  free((void *)pcretables);
3299    
3300    free_pattern_chain(patterns);
3301    free_pattern_chain(include_patterns);
3302    free_pattern_chain(include_dir_patterns);
3303    free_pattern_chain(exclude_patterns);
3304    free_pattern_chain(exclude_dir_patterns);
3305    
3306    free_file_chain(exclude_from);
3307    free_file_chain(include_from);
3308    free_file_chain(pattern_files);
3309    free_file_chain(file_lists);
3310    
3311    while (only_matching != NULL)
3312    {    {
3313    for (i = 0; i < hint_count; i++) free(hints_list[i]);    omstr *this = only_matching;
3314    free(hints_list);    only_matching = this->next;
3315      free(this);
3316    }    }
3317  return rc;  
3318    pcregrep_exit(rc);
3319    
3320  EXIT2:  EXIT2:
3321  rc = 2;  rc = 2;

Legend:
Removed from v.296  
changed lines
  Added in v.1548

  ViewVC Help
Powered by ViewVC 1.1.5