/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 296 by ph10, Tue Jan 1 20:09:30 2008 UTC revision 1324 by ph10, Fri May 10 11:40:06 2013 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 70  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define OFFSET_SIZE 99
74    
75  #if BUFSIZ > 8192  #if BUFSIZ > 8192
76  #define MBUFTHIRD BUFSIZ  #define MAXPATLEN BUFSIZ
77  #else  #else
78  #define MBUFTHIRD 8192  #define MAXPATLEN 8192
79  #endif  #endif
80    
81    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
82    
83  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
84  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
85  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
86    
87  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88    
89  /* File reading styles */  /* File reading styles */
90    
# Line 103  enum { DEE_READ, DEE_SKIP }; Line 105  enum { DEE_READ, DEE_SKIP };
105    
106  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107    
108    /* Binary file options */
109    
110    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111    
112    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113    environments), a warning is issued if the value of fwrite() is ignored.
114    Unfortunately, casting to (void) does not suppress the warning. To get round
115    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116    apply to fprintf(). */
117    
118    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119    
120    
121    
122  /*************************************************  /*************************************************
# Line 126  static char *colour_string = (char *)"1; Line 140  static char *colour_string = (char *)"1;
140  static char *colour_option = NULL;  static char *colour_option = NULL;
141  static char *dee_option = NULL;  static char *dee_option = NULL;
142  static char *DEE_option = NULL;  static char *DEE_option = NULL;
143    static char *locale = NULL;
144    static char *main_buffer = NULL;
145  static char *newline = NULL;  static char *newline = NULL;
146  static char *pattern_filename = NULL;  static char *om_separator = (char *)"";
147  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
 static char *locale = NULL;  
148    
149  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
150    
 static int  pattern_count = 0;  
 static pcre **pattern_list = NULL;  
 static pcre_extra **hints_list = NULL;  
   
 static char *include_pattern = NULL;  
 static char *exclude_pattern = NULL;  
   
 static pcre *include_compiled = NULL;  
 static pcre *exclude_compiled = NULL;  
   
151  static int after_context = 0;  static int after_context = 0;
152  static int before_context = 0;  static int before_context = 0;
153    static int binary_files = BIN_BINARY;
154  static int both_context = 0;  static int both_context = 0;
155    static int bufthird = PCREGREP_BUFSIZE;
156    static int bufsize = 3*PCREGREP_BUFSIZE;
157    
158    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
159    static int dee_action = dee_SKIP;
160    #else
161  static int dee_action = dee_READ;  static int dee_action = dee_READ;
162    #endif
163    
164  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
165  static int error_count = 0;  static int error_count = 0;
166  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
167    static int pcre_options = 0;
168  static int process_options = 0;  static int process_options = 0;
169    
170    #ifdef SUPPORT_PCREGREP_JIT
171    static int study_options = PCRE_STUDY_JIT_COMPILE;
172    #else
173    static int study_options = 0;
174    #endif
175    
176    static unsigned long int match_limit = 0;
177    static unsigned long int match_limit_recursion = 0;
178    
179  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
180  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
181  static BOOL file_offsets = FALSE;  static BOOL file_offsets = FALSE;
182  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
183  static BOOL invert = FALSE;  static BOOL invert = FALSE;
184    static BOOL line_buffered = FALSE;
185  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
186  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
187  static BOOL number = FALSE;  static BOOL number = FALSE;
188  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
189    static BOOL resource_error = FALSE;
190  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
191    static BOOL show_only_matching = FALSE;
192  static BOOL silent = FALSE;  static BOOL silent = FALSE;
193  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
194    
195    /* Structure for list of --only-matching capturing numbers. */
196    
197    typedef struct omstr {
198      struct omstr *next;
199      int groupnum;
200    } omstr;
201    
202    static omstr *only_matching = NULL;
203    static omstr *only_matching_last = NULL;
204    
205    /* Structure for holding the two variables that describe a number chain. */
206    
207    typedef struct omdatastr {
208      omstr **anchor;
209      omstr **lastptr;
210    } omdatastr;
211    
212    static omdatastr only_matching_data = { &only_matching, &only_matching_last };
213    
214    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
215    
216    typedef struct fnstr {
217      struct fnstr *next;
218      char *name;
219    } fnstr;
220    
221    static fnstr *exclude_from = NULL;
222    static fnstr *exclude_from_last = NULL;
223    static fnstr *include_from = NULL;
224    static fnstr *include_from_last = NULL;
225    
226    static fnstr *file_lists = NULL;
227    static fnstr *file_lists_last = NULL;
228    static fnstr *pattern_files = NULL;
229    static fnstr *pattern_files_last = NULL;
230    
231    /* Structure for holding the two variables that describe a file name chain. */
232    
233    typedef struct fndatastr {
234      fnstr **anchor;
235      fnstr **lastptr;
236    } fndatastr;
237    
238    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
239    static fndatastr include_from_data = { &include_from, &include_from_last };
240    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
241    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
242    
243    /* Structure for pattern and its compiled form; used for matching patterns and
244    also for include/exclude patterns. */
245    
246    typedef struct patstr {
247      struct patstr *next;
248      char *string;
249      pcre *compiled;
250      pcre_extra *hint;
251    } patstr;
252    
253    static patstr *patterns = NULL;
254    static patstr *patterns_last = NULL;
255    static patstr *include_patterns = NULL;
256    static patstr *include_patterns_last = NULL;
257    static patstr *exclude_patterns = NULL;
258    static patstr *exclude_patterns_last = NULL;
259    static patstr *include_dir_patterns = NULL;
260    static patstr *include_dir_patterns_last = NULL;
261    static patstr *exclude_dir_patterns = NULL;
262    static patstr *exclude_dir_patterns_last = NULL;
263    
264    /* Structure holding the two variables that describe a pattern chain. A pointer
265    to such structures is used for each appropriate option. */
266    
267    typedef struct patdatastr {
268      patstr **anchor;
269      patstr **lastptr;
270    } patdatastr;
271    
272    static patdatastr match_patdata = { &patterns, &patterns_last };
273    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
274    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
275    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
276    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
277    
278    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
279                                     &include_dir_patterns, &exclude_dir_patterns };
280    
281    static const char *incexname[4] = { "--include", "--exclude",
282                                        "--include-dir", "--exclude-dir" };
283    
284  /* Structure for options and list of them */  /* Structure for options and list of them */
285    
286  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
287         OP_PATLIST };         OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
288    
289  typedef struct option_item {  typedef struct option_item {
290    int type;    int type;
# Line 181  typedef struct option_item { Line 297  typedef struct option_item {
297  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
298  used to identify them. */  used to identify them. */
299    
300  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
301  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
302  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
303  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
304  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
305  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
306  #define N_NULL      (-7)  #define N_LABEL        (-7)
307  #define N_LOFFSETS  (-8)  #define N_LOCALE       (-8)
308  #define N_FOFFSETS  (-9)  #define N_NULL         (-9)
309    #define N_LOFFSETS     (-10)
310    #define N_FOFFSETS     (-11)
311    #define N_LBUFFER      (-12)
312    #define N_M_LIMIT      (-13)
313    #define N_M_LIMIT_REC  (-14)
314    #define N_BUFSIZE      (-15)
315    #define N_NOJIT        (-16)
316    #define N_FILE_LIST    (-17)
317    #define N_BINARY_FILES (-18)
318    #define N_EXCLUDE_FROM (-19)
319    #define N_INCLUDE_FROM (-20)
320    #define N_OM_SEPARATOR (-21)
321    
322  static option_item optionlist[] = {  static option_item optionlist[] = {
323    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
324    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
325    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
326    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
327    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
328    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
329    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
330    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
331    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
332    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
333    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
334    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
335    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
336    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
337    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
338    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
339    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
340    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
341    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
342    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
343    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
344    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
345    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },  #ifdef SUPPORT_PCREGREP_JIT
346    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
347    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },  #else
348    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
349    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },  #endif
350    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
351    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
352    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
353      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
354      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
355      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
356      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
357      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
358      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
359      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
360      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
361      { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
362      { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
363      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
364      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
365      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
366      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
367      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
368      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
369      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
370      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
371    
372      /* These two were accidentally implemented with underscores instead of
373      hyphens in the option names. As this was not discovered for several releases,
374      the incorrect versions are left in the table for compatibility. However, the
375      --help function misses out any option that has an underscore in its name. */
376    
377      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
378      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
379    
380  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
381    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
382  #endif  #endif
# Line 237  static option_item optionlist[] = { Line 392  static option_item optionlist[] = {
392  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
393  options. These set the 1, 2, and 4 bits in process_options, respectively. Note  options. These set the 1, 2, and 4 bits in process_options, respectively. Note
394  that the combination of -w and -x has the same effect as -x on its own, so we  that the combination of -w and -x has the same effect as -x on its own, so we
395  can treat them as the same. */  can treat them as the same. Note that the MAXPATLEN macro assumes the longest
396    prefix+suffix is 10 characters; if anything longer is added, it must be
397    adjusted. */
398    
399  static const char *prefix[] = {  static const char *prefix[] = {
400    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
# Line 258  const char utf8_table4[] = { Line 415  const char utf8_table4[] = {
415    
416    
417  /*************************************************  /*************************************************
418    *         Exit from the program                  *
419    *************************************************/
420    
421    /* If there has been a resource error, give a suitable message.
422    
423    Argument:  the return code
424    Returns:   does not return
425    */
426    
427    static void
428    pcregrep_exit(int rc)
429    {
430    if (resource_error)
431      {
432      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
433        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
434        PCRE_ERROR_JIT_STACKLIMIT);
435      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
436      }
437    exit(rc);
438    }
439    
440    
441    /*************************************************
442    *          Add item to chain of patterns         *
443    *************************************************/
444    
445    /* Used to add an item onto a chain, or just return an unconnected item if the
446    "after" argument is NULL.
447    
448    Arguments:
449      s          pattern string to add
450      after      if not NULL points to item to insert after
451    
452    Returns:     new pattern block
453    */
454    
455    static patstr *
456    add_pattern(char *s, patstr *after)
457    {
458    patstr *p = (patstr *)malloc(sizeof(patstr));
459    if (p == NULL)
460      {
461      fprintf(stderr, "pcregrep: malloc failed\n");
462      pcregrep_exit(2);
463      }
464    if (strlen(s) > MAXPATLEN)
465      {
466      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
467        MAXPATLEN);
468      return NULL;
469      }
470    p->next = NULL;
471    p->string = s;
472    p->compiled = NULL;
473    p->hint = NULL;
474    
475    if (after != NULL)
476      {
477      p->next = after->next;
478      after->next = p;
479      }
480    return p;
481    }
482    
483    
484    /*************************************************
485    *           Free chain of patterns               *
486    *************************************************/
487    
488    /* Used for several chains of patterns.
489    
490    Argument: pointer to start of chain
491    Returns:  nothing
492    */
493    
494    static void
495    free_pattern_chain(patstr *pc)
496    {
497    while (pc != NULL)
498      {
499      patstr *p = pc;
500      pc = p->next;
501      if (p->hint != NULL) pcre_free_study(p->hint);
502      if (p->compiled != NULL) pcre_free(p->compiled);
503      free(p);
504      }
505    }
506    
507    
508    /*************************************************
509    *           Free chain of file names             *
510    *************************************************/
511    
512    /*
513    Argument: pointer to start of chain
514    Returns:  nothing
515    */
516    
517    static void
518    free_file_chain(fnstr *fn)
519    {
520    while (fn != NULL)
521      {
522      fnstr *f = fn;
523      fn = f->next;
524      free(f);
525      }
526    }
527    
528    
529    /*************************************************
530  *            OS-specific functions               *  *            OS-specific functions               *
531  *************************************************/  *************************************************/
532    
# Line 273  although at present the only ones are fo Line 542  although at present the only ones are fo
542  #include <dirent.h>  #include <dirent.h>
543    
544  typedef DIR directory_type;  typedef DIR directory_type;
545    #define FILESEP '/'
546    
547  static int  static int
548  isdirectory(char *filename)  isdirectory(char *filename)
# Line 280  isdirectory(char *filename) Line 550  isdirectory(char *filename)
550  struct stat statbuf;  struct stat statbuf;
551  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
552    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
553  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
554  }  }
555    
556  static directory_type *  static directory_type *
# Line 321  return (statbuf.st_mode & S_IFMT) == S_I Line 591  return (statbuf.st_mode & S_IFMT) == S_I
591  }  }
592    
593    
594  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
595    
596  static BOOL  static BOOL
597  is_stdout_tty(void)  is_stdout_tty(void)
# Line 329  is_stdout_tty(void) Line 599  is_stdout_tty(void)
599  return isatty(fileno(stdout));  return isatty(fileno(stdout));
600  }  }
601    
602    static BOOL
603    is_file_tty(FILE *f)
604    {
605    return isatty(fileno(f));
606    }
607    
608    
609  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
610    
# Line 336  return isatty(fileno(stdout)); Line 612  return isatty(fileno(stdout));
612  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
613  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
614  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
615  */  The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
616    undefined when it is indeed undefined. */
617    
618  #elif HAVE_WINDOWS_H  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
619    
620  #ifndef STRICT  #ifndef STRICT
621  # define STRICT  # define STRICT
# Line 360  BOOL first; Line 637  BOOL first;
637  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
638  } directory_type;  } directory_type;
639    
640    #define FILESEP '/'
641    
642  int  int
643  isdirectory(char *filename)  isdirectory(char *filename)
644  {  {
645  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
646  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
647    return 0;    return 0;
648  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
649  }  }
650    
651  directory_type *  directory_type *
# Line 377  char *pattern; Line 656  char *pattern;
656  directory_type *dir;  directory_type *dir;
657  DWORD err;  DWORD err;
658  len = strlen(filename);  len = strlen(filename);
659  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
660  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
661  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
662    {    {
663    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
664    exit(2);    pcregrep_exit(2);
665    }    }
666  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
667  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 441  return !isdirectory(filename); Line 720  return !isdirectory(filename);
720  }  }
721    
722    
723  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
724    
725  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
726    
# Line 451  is_stdout_tty(void) Line 730  is_stdout_tty(void)
730  return FALSE;  return FALSE;
731  }  }
732    
733    static BOOL
734    is_file_tty(FILE *f)
735    {
736    return FALSE;
737    }
738    
739    
740  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
741    
# Line 458  return FALSE; Line 743  return FALSE;
743    
744  #else  #else
745    
746    #define FILESEP 0
747  typedef void directory_type;  typedef void directory_type;
748    
749  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
# Line 473  void closedirectory(directory_type *dir) Line 759  void closedirectory(directory_type *dir)
759  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
760    
761    
762  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
763    
764  static BOOL  static BOOL
765  is_stdout_tty(void)  is_stdout_tty(void)
# Line 481  is_stdout_tty(void) Line 767  is_stdout_tty(void)
767  return FALSE;  return FALSE;
768  }  }
769    
770    static BOOL
771    is_file_tty(FILE *f)
772    {
773    return FALSE;
774    }
775    
776  #endif  #endif
777    
# Line 509  return sys_errlist[n]; Line 800  return sys_errlist[n];
800    
801    
802  /*************************************************  /*************************************************
803    *                Usage function                  *
804    *************************************************/
805    
806    static int
807    usage(int rc)
808    {
809    option_item *op;
810    fprintf(stderr, "Usage: pcregrep [-");
811    for (op = optionlist; op->one_char != 0; op++)
812      {
813      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
814      }
815    fprintf(stderr, "] [long options] [pattern] [files]\n");
816    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
817      "options.\n");
818    return rc;
819    }
820    
821    
822    
823    /*************************************************
824    *                Help function                   *
825    *************************************************/
826    
827    static void
828    help(void)
829    {
830    option_item *op;
831    
832    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
833    printf("Search for PATTERN in each FILE or standard input.\n");
834    printf("PATTERN must be present if neither -e nor -f is used.\n");
835    printf("\"-\" can be used as a file name to mean STDIN.\n");
836    
837    #ifdef SUPPORT_LIBZ
838    printf("Files whose names end in .gz are read using zlib.\n");
839    #endif
840    
841    #ifdef SUPPORT_LIBBZ2
842    printf("Files whose names end in .bz2 are read using bzlib2.\n");
843    #endif
844    
845    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
846    printf("Other files and the standard input are read as plain files.\n\n");
847    #else
848    printf("All files are read as plain files, without any interpretation.\n\n");
849    #endif
850    
851    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
852    printf("Options:\n");
853    
854    for (op = optionlist; op->one_char != 0; op++)
855      {
856      int n;
857      char s[4];
858    
859      /* Two options were accidentally implemented and documented with underscores
860      instead of hyphens in their names, something that was not noticed for quite a
861      few releases. When fixing this, I left the underscored versions in the list
862      in case people were using them. However, we don't want to display them in the
863      help data. There are no other options that contain underscores, and we do not
864      expect ever to implement such options. Therefore, just omit any option that
865      contains an underscore. */
866    
867      if (strchr(op->long_name, '_') != NULL) continue;
868    
869      if (op->one_char > 0 && (op->long_name)[0] == 0)
870        n = 31 - printf("  -%c", op->one_char);
871      else
872        {
873        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
874          else strcpy(s, "   ");
875        n = 31 - printf("  %s --%s", s, op->long_name);
876        }
877    
878      if (n < 1) n = 1;
879      printf("%.*s%s\n", n, "                           ", op->help_text);
880      }
881    
882    printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
883    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
884    printf("When reading patterns or file names from a file, trailing white\n");
885    printf("space is removed and blank lines are ignored.\n");
886    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
887    
888    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
889    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
890    }
891    
892    
893    
894    /*************************************************
895    *            Test exclude/includes               *
896    *************************************************/
897    
898    /* If any exclude pattern matches, the path is excluded. Otherwise, unless
899    there are no includes, the path must match an include pattern.
900    
901    Arguments:
902      path      the path to be matched
903      ip        the chain of include patterns
904      ep        the chain of exclude patterns
905    
906    Returns:    TRUE if the path is not excluded
907    */
908    
909    static BOOL
910    test_incexc(char *path, patstr *ip, patstr *ep)
911    {
912    int plen = strlen(path);
913    
914    for (; ep != NULL; ep = ep->next)
915      {
916      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
917        return FALSE;
918      }
919    
920    if (ip == NULL) return TRUE;
921    
922    for (; ip != NULL; ip = ip->next)
923      {
924      if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
925        return TRUE;
926      }
927    
928    return FALSE;
929    }
930    
931    
932    
933    /*************************************************
934    *         Decode integer argument value          *
935    *************************************************/
936    
937    /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
938    because SunOS4 doesn't have it. This is used only for unpicking arguments, so
939    just keep it simple.
940    
941    Arguments:
942      option_data   the option data string
943      op            the option item (for error messages)
944      longop        TRUE if option given in long form
945    
946    Returns:        a long integer
947    */
948    
949    static long int
950    decode_number(char *option_data, option_item *op, BOOL longop)
951    {
952    unsigned long int n = 0;
953    char *endptr = option_data;
954    while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
955    while (isdigit((unsigned char)(*endptr)))
956      n = n * 10 + (int)(*endptr++ - '0');
957    if (toupper(*endptr) == 'K')
958      {
959      n *= 1024;
960      endptr++;
961      }
962    else if (toupper(*endptr) == 'M')
963      {
964      n *= 1024*1024;
965      endptr++;
966      }
967    
968    if (*endptr != 0)   /* Error */
969      {
970      if (longop)
971        {
972        char *equals = strchr(op->long_name, '=');
973        int nlen = (equals == NULL)? (int)strlen(op->long_name) :
974          (int)(equals - op->long_name);
975        fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
976          option_data, nlen, op->long_name);
977        }
978      else
979        fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
980          option_data, op->one_char);
981      pcregrep_exit(usage(2));
982      }
983    
984    return n;
985    }
986    
987    
988    
989    /*************************************************
990    *       Add item to a chain of numbers           *
991    *************************************************/
992    
993    /* Used to add an item onto a chain, or just return an unconnected item if the
994    "after" argument is NULL.
995    
996    Arguments:
997      n          the number to add
998      after      if not NULL points to item to insert after
999    
1000    Returns:     new number block
1001    */
1002    
1003    static omstr *
1004    add_number(int n, omstr *after)
1005    {
1006    omstr *om = (omstr *)malloc(sizeof(omstr));
1007    
1008    if (om == NULL)
1009      {
1010      fprintf(stderr, "pcregrep: malloc failed\n");
1011      pcregrep_exit(2);
1012      }
1013    om->next = NULL;
1014    om->groupnum = n;
1015    
1016    if (after != NULL)
1017      {
1018      om->next = after->next;
1019      after->next = om;
1020      }
1021    return om;
1022    }
1023    
1024    
1025    
1026    /*************************************************
1027    *            Read one line of input              *
1028    *************************************************/
1029    
1030    /* Normally, input is read using fread() into a large buffer, so many lines may
1031    be read at once. However, doing this for tty input means that no output appears
1032    until a lot of input has been typed. Instead, tty input is handled line by
1033    line. We cannot use fgets() for this, because it does not stop at a binary
1034    zero, and therefore there is no way of telling how many characters it has read,
1035    because there may be binary zeros embedded in the data.
1036    
1037    Arguments:
1038      buffer     the buffer to read into
1039      length     the maximum number of characters to read
1040      f          the file
1041    
1042    Returns:     the number of characters read, zero at end of file
1043    */
1044    
1045    static unsigned int
1046    read_one_line(char *buffer, int length, FILE *f)
1047    {
1048    int c;
1049    int yield = 0;
1050    while ((c = fgetc(f)) != EOF)
1051      {
1052      buffer[yield++] = c;
1053      if (c == '\n' || yield >= length) break;
1054      }
1055    return yield;
1056    }
1057    
1058    
1059    
1060    /*************************************************
1061  *             Find end of line                   *  *             Find end of line                   *
1062  *************************************************/  *************************************************/
1063    
# Line 520  Arguments: Line 1069  Arguments:
1069    endptr    end of available data    endptr    end of available data
1070    lenptr    where to put the length of the eol sequence    lenptr    where to put the length of the eol sequence
1071    
1072  Returns:    pointer to the last byte of the line  Returns:    pointer after the last byte of the line,
1073                including the newline byte(s)
1074  */  */
1075    
1076  static char *  static char *
# Line 589  switch(endlinetype) Line 1139  switch(endlinetype)
1139    
1140      switch (c)      switch (c)
1141        {        {
1142        case 0x0a:    /* LF */        case '\n':
1143        *lenptr = 1;        *lenptr = 1;
1144        return p;        return p;
1145    
1146        case 0x0d:    /* CR */        case '\r':
1147        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
1148          {          {
1149          *lenptr = 2;          *lenptr = 2;
1150          p++;          p++;
# Line 633  switch(endlinetype) Line 1183  switch(endlinetype)
1183    
1184      switch (c)      switch (c)
1185        {        {
1186        case 0x0a:    /* LF */        case '\n':    /* LF */
1187        case 0x0b:    /* VT */        case '\v':    /* VT */
1188        case 0x0c:    /* FF */        case '\f':    /* FF */
1189        *lenptr = 1;        *lenptr = 1;
1190        return p;        return p;
1191    
1192        case 0x0d:    /* CR */        case '\r':    /* CR */
1193        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
1194          {          {
1195          *lenptr = 2;          *lenptr = 2;
1196          p++;          p++;
# Line 648  switch(endlinetype) Line 1198  switch(endlinetype)
1198        else *lenptr = 1;        else *lenptr = 1;
1199        return p;        return p;
1200    
1201        case 0x85:    /* NEL */  #ifndef EBCDIC
1202          case 0x85:    /* Unicode NEL */
1203        *lenptr = utf8? 2 : 1;        *lenptr = utf8? 2 : 1;
1204        return p;        return p;
1205    
1206        case 0x2028:  /* LS */        case 0x2028:  /* Unicode LS */
1207        case 0x2029:  /* PS */        case 0x2029:  /* Unicode PS */
1208        *lenptr = 3;        *lenptr = 3;
1209        return p;        return p;
1210    #endif  /* Not EBCDIC */
1211    
1212        default:        default:
1213        break;        break;
# Line 714  switch(endlinetype) Line 1266  switch(endlinetype)
1266    
1267    while (p > startptr)    while (p > startptr)
1268      {      {
1269      register int c;      register unsigned int c;
1270      char *pp = p - 1;      char *pp = p - 1;
1271    
1272      if (utf8)      if (utf8)
# Line 739  switch(endlinetype) Line 1291  switch(endlinetype)
1291    
1292      if (endlinetype == EL_ANYCRLF) switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
1293        {        {
1294        case 0x0a:    /* LF */        case '\n':    /* LF */
1295        case 0x0d:    /* CR */        case '\r':    /* CR */
1296        return p;        return p;
1297    
1298        default:        default:
# Line 749  switch(endlinetype) Line 1301  switch(endlinetype)
1301    
1302      else switch (c)      else switch (c)
1303        {        {
1304        case 0x0a:    /* LF */        case '\n':    /* LF */
1305        case 0x0b:    /* VT */        case '\v':    /* VT */
1306        case 0x0c:    /* FF */        case '\f':    /* FF */
1307        case 0x0d:    /* CR */        case '\r':    /* CR */
1308        case 0x85:    /* NEL */  #ifndef EBCDIE
1309        case 0x2028:  /* LS */        case 0x85:    /* Unicode NEL */
1310        case 0x2029:  /* PS */        case 0x2028:  /* Unicode LS */
1311          case 0x2029:  /* Unicode PS */
1312    #endif  /* Not EBCDIC */
1313        return p;        return p;
1314    
1315        default:        default:
# Line 790  Arguments: Line 1344  Arguments:
1344  Returns:            nothing  Returns:            nothing
1345  */  */
1346    
1347  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,  static void
1348    char *endptr, char *printname)  do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1349      char *printname)
1350  {  {
1351  if (after_context > 0 && lastmatchnumber > 0)  if (after_context > 0 && lastmatchnumber > 0)
1352    {    {
# Line 803  if (after_context > 0 && lastmatchnumber Line 1358  if (after_context > 0 && lastmatchnumber
1358      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
1359      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1360      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
1361      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1362      lastmatchrestart = pp;      lastmatchrestart = pp;
1363      }      }
1364    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 813  if (after_context > 0 && lastmatchnumber Line 1368  if (after_context > 0 && lastmatchnumber
1368    
1369    
1370  /*************************************************  /*************************************************
1371    *   Apply patterns to subject till one matches   *
1372    *************************************************/
1373    
1374    /* This function is called to run through all patterns, looking for a match. It
1375    is used multiple times for the same subject when colouring is enabled, in order
1376    to find all possible matches.
1377    
1378    Arguments:
1379      matchptr     the start of the subject
1380      length       the length of the subject to match
1381      options      options for pcre_exec
1382      startoffset  where to start matching
1383      offsets      the offets vector to fill in
1384      mrc          address of where to put the result of pcre_exec()
1385    
1386    Returns:      TRUE if there was a match
1387                  FALSE if there was no match
1388                  invert if there was a non-fatal error
1389    */
1390    
1391    static BOOL
1392    match_patterns(char *matchptr, size_t length, unsigned int options,
1393      int startoffset, int *offsets, int *mrc)
1394    {
1395    int i;
1396    size_t slen = length;
1397    patstr *p = patterns;
1398    const char *msg = "this text:\n\n";
1399    
1400    if (slen > 200)
1401      {
1402      slen = 200;
1403      msg = "text that starts:\n\n";
1404      }
1405    for (i = 1; p != NULL; p = p->next, i++)
1406      {
1407      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1408        startoffset, options, offsets, OFFSET_SIZE);
1409      if (*mrc >= 0) return TRUE;
1410      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1411      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1412      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1413      fprintf(stderr, "%s", msg);
1414      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1415      fprintf(stderr, "\n\n");
1416      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1417          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1418        resource_error = TRUE;
1419      if (error_count++ > 20)
1420        {
1421        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1422        pcregrep_exit(2);
1423        }
1424      return invert;    /* No more matching; don't show the line again */
1425      }
1426    
1427    return FALSE;  /* No match, no errors */
1428    }
1429    
1430    
1431    
1432    /*************************************************
1433  *            Grep an individual file             *  *            Grep an individual file             *
1434  *************************************************/  *************************************************/
1435    
1436  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1437  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1438  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1439  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1440  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
# Line 828  Arguments: Line 1445  Arguments:
1445                 the gzFile pointer when reading is via libz                 the gzFile pointer when reading is via libz
1446                 the BZFILE pointer when reading is via libbz2                 the BZFILE pointer when reading is via libbz2
1447    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1448      filename     the file name or NULL (for errors)
1449    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1450                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1451                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1452    
1453  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1454                 1 otherwise (no matches)                 1 otherwise (no matches)
1455                 2 if there is a read error on a .bz2 file                 2 if an overlong line is encountered
1456                   3 if there is a read error on a .bz2 file
1457  */  */
1458    
1459  static int  static int
1460  pcregrep(void *handle, int frtype, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1461  {  {
1462  int rc = 1;  int rc = 1;
1463  int linenumber = 1;  int linenumber = 1;
1464  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1465  int count = 0;  int count = 0;
1466  int filepos = 0;  int filepos = 0;
1467  int offsets[99];  int offsets[OFFSET_SIZE];
1468  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1469  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1470  char *endptr;  char *endptr;
1471  size_t bufflength;  size_t bufflength;
1472    BOOL binary = FALSE;
1473  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1474    BOOL input_line_buffered = line_buffered;
1475  FILE *in = NULL;                    /* Ensure initialized */  FILE *in = NULL;                    /* Ensure initialized */
1476    
1477  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 868  of what we have. In the case of libz, a Line 1488  of what we have. In the case of libz, a
1488  plain file. However, if a .bz2 file isn't actually bzipped, the first read will  plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1489  fail. */  fail. */
1490    
1491    (void)frtype;
1492    
1493  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
1494  if (frtype == FR_LIBZ)  if (frtype == FR_LIBZ)
1495    {    {
1496    ingz = (gzFile)handle;    ingz = (gzFile)handle;
1497    bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);    bufflength = gzread (ingz, main_buffer, bufsize);
1498    }    }
1499  else  else
1500  #endif  #endif
# Line 881  else Line 1503  else
1503  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
1504    {    {
1505    inbz2 = (BZFILE *)handle;    inbz2 = (BZFILE *)handle;
1506    bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);    bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1507    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1508    }                                    /* without the cast it is unsigned. */    }                                    /* without the cast it is unsigned. */
1509  else  else
# Line 889  else Line 1511  else
1511    
1512    {    {
1513    in = (FILE *)handle;    in = (FILE *)handle;
1514    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);    if (is_file_tty(in)) input_line_buffered = TRUE;
1515      bufflength = input_line_buffered?
1516        read_one_line(main_buffer, bufsize, in) :
1517        fread(main_buffer, 1, bufsize, in);
1518    }    }
1519    
1520  endptr = buffer + bufflength;  endptr = main_buffer + bufflength;
1521    
1522    /* Unless binary-files=text, see if we have a binary file. This uses the same
1523    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1524    file. */
1525    
1526    if (binary_files != BIN_TEXT)
1527      {
1528      binary =
1529        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1530      if (binary && binary_files == BIN_NOMATCH) return 1;
1531      }
1532    
1533  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1534  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 901  way, the buffer is shifted left and re-f Line 1537  way, the buffer is shifted left and re-f
1537    
1538  while (ptr < endptr)  while (ptr < endptr)
1539    {    {
1540    int i, endlinelength;    int endlinelength;
1541    int mrc = 0;    int mrc = 0;
1542    BOOL match = FALSE;    int startoffset = 0;
1543      unsigned int options = 0;
1544      BOOL match;
1545    char *matchptr = ptr;    char *matchptr = ptr;
1546    char *t = ptr;    char *t = ptr;
1547    size_t length, linelength;    size_t length, linelength;
# Line 911  while (ptr < endptr) Line 1549  while (ptr < endptr)
1549    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1550    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1551    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1552    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1553    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1554    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1555      first line. */
1556    
1557    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1558    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1559    length = multiline? (size_t)(endptr - ptr) : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1560    
1561      /* Check to see if the line we are looking at extends right to the very end
1562      of the buffer without a line terminator. This means the line is too long to
1563      handle. */
1564    
1565      if (endlinelength == 0 && t == main_buffer + bufsize)
1566        {
1567        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1568                        "pcregrep: check the --buffer-size option\n",
1569                        linenumber,
1570                        (filename == NULL)? "" : " of file ",
1571                        (filename == NULL)? "" : filename);
1572        return 2;
1573        }
1574    
1575    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1576    
1577  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
1578    if (jfriedl_XT || jfriedl_XR)    if (jfriedl_XT || jfriedl_XR)
1579    {    {
1580        #include <sys/time.h>  #     include <sys/time.h>
1581        #include <time.h>  #     include <time.h>
1582        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1583        struct timezone dummy;        struct timezone dummy;
1584          int i;
1585    
1586        if (jfriedl_XT)        if (jfriedl_XT)
1587        {        {
# Line 936  while (ptr < endptr) Line 1590  while (ptr < endptr)
1590            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1591            if (!ptr) {            if (!ptr) {
1592                    printf("out of memory");                    printf("out of memory");
1593                    exit(2);                    pcregrep_exit(2);
1594            }            }
1595            endptr = ptr;            endptr = ptr;
1596            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 953  while (ptr < endptr) Line 1607  while (ptr < endptr)
1607    
1608    
1609        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1610            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1611                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1612    
1613        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1614                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 967  while (ptr < endptr) Line 1622  while (ptr < endptr)
1622    }    }
1623  #endif  #endif
1624    
1625    /* We come back here after a match when the -o option (only_matching) is set,    /* We come back here after a match when show_only_matching is set, in order
1626    in order to find any further matches in the same line. */    to find any further matches in the same line. This applies to
1627      --only-matching, --file-offsets, and --line-offsets. */
1628    
1629    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1630    
1631    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches or there is an error other
1632    the final newline in the subject string. */    than NOMATCH. This code is in a subroutine so that it can be re-used for
1633      finding subsequent matches when colouring matched lines. After finding one
1634      match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1635      this line. */
1636    
1637    for (i = 0; i < pattern_count; i++)    match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1638      {    options = PCRE_NOTEMPTY;
     mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1639    
1640    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1641    
# Line 1018  while (ptr < endptr) Line 1651  while (ptr < endptr)
1651    
1652      if (count_only) count++;      if (count_only) count++;
1653    
1654        /* When handling a binary file and binary-files==binary, the "binary"
1655        variable will be set true (it's false in all other cases). In this
1656        situation we just want to output the file name. No need to scan further. */
1657    
1658        else if (binary)
1659          {
1660          fprintf(stdout, "Binary file %s matches\n", filename);
1661          return 0;
1662          }
1663    
1664      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1665      in the file. */      in the file. */
1666    
1667      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1668        {        {
1669        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1670        return 0;        return 0;
# Line 1031  while (ptr < endptr) Line 1674  while (ptr < endptr)
1674    
1675      else if (quiet) return 0;      else if (quiet) return 0;
1676    
1677      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched,
1678      the --file-offsets and --line-offsets options output offsets for the      and/or one or more captured portions of it, as long as these strings are
1679      matching substring (they both force --only-matching). None of these options      not empty. The --file-offsets and --line-offsets options output offsets for
1680      prints any context. Afterwards, adjust the start and length, and then jump      the matching substring (all three set show_only_matching). None of these
1681      back to look for further matches in the same line. If we are in invert      mutually exclusive options prints any context. Afterwards, adjust the start
1682      mode, however, nothing is printed - this could be still useful because the      and then jump back to look for further matches in the same line. If we are
1683      return code is set. */      in invert mode, however, nothing is printed and we do not restart - this
1684        could still be useful because the return code is set. */
1685    
1686      else if (only_matching)      else if (show_only_matching)
1687        {        {
1688        if (!invert)        if (!invert)
1689          {          {
1690          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1691          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1692    
1693            /* Handle --line-offsets */
1694    
1695          if (line_offsets)          if (line_offsets)
1696            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1697              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1698    
1699            /* Handle --file-offsets */
1700    
1701          else if (file_offsets)          else if (file_offsets)
1702            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n",
1703                (int)(filepos + matchptr + offsets[0] - ptr),
1704              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1705    
1706            /* Handle --only-matching, which may occur many times */
1707    
1708          else          else
1709            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            {
1710          fprintf(stdout, "\n");            BOOL printed = FALSE;
1711          matchptr += offsets[1];            omstr *om;
1712          length -= offsets[1];  
1713              for (om = only_matching; om != NULL; om = om->next)
1714                {
1715                int n = om->groupnum;
1716                if (n < mrc)
1717                  {
1718                  int plen = offsets[2*n + 1] - offsets[2*n];
1719                  if (plen > 0)
1720                    {
1721                    if (printed) fprintf(stdout, "%s", om_separator);
1722                    if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1723                    FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1724                    if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1725                    printed = TRUE;
1726                    }
1727                  }
1728                }
1729    
1730              if (printed || printname != NULL || number) fprintf(stdout, "\n");
1731              }
1732    
1733            /* Prepare to repeat to find the next match */
1734    
1735          match = FALSE;          match = FALSE;
1736            if (line_buffered) fflush(stdout);
1737            rc = 0;                      /* Had some success */
1738            startoffset = offsets[1];    /* Restart after the match */
1739          goto ONLY_MATCHING_RESTART;          goto ONLY_MATCHING_RESTART;
1740          }          }
1741        }        }
# Line 1092  while (ptr < endptr) Line 1771  while (ptr < endptr)
1771            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1772            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1773            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1774            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1775            lastmatchrestart = pp;            lastmatchrestart = pp;
1776            }            }
1777          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1115  while (ptr < endptr) Line 1794  while (ptr < endptr)
1794          int linecount = 0;          int linecount = 0;
1795          char *p = ptr;          char *p = ptr;
1796    
1797          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1798                 linecount < before_context)                 linecount < before_context)
1799            {            {
1800            linecount++;            linecount++;
1801            p = previous_line(p, buffer);            p = previous_line(p, main_buffer);
1802            }            }
1803    
1804          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 1132  while (ptr < endptr) Line 1811  while (ptr < endptr)
1811            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1812            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1813            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1814            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1815            p = pp;            p = pp;
1816            }            }
1817          }          }
# Line 1152  while (ptr < endptr) Line 1831  while (ptr < endptr)
1831        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1832        the match will always be before the first newline sequence. */        the match will always be before the first newline sequence. */
1833    
1834        if (multiline)        if (multiline & !invert)
1835          {          {
1836          int ellength;          char *endmatch = ptr + offsets[1];
1837          char *endmatch = ptr;          t = ptr;
1838          if (!invert)          while (t < endmatch)
1839            {            {
1840            endmatch += offsets[1];            t = end_of_line(t, endptr, &endlinelength);
1841            t = ptr;            if (t < endmatch) linenumber++; else break;
           while (t < endmatch)  
             {  
             t = end_of_line(t, endptr, &ellength);  
             if (t <= endmatch) linenumber++; else break;  
             }  
1842            }            }
1843          endmatch = end_of_line(endmatch, endptr, &ellength);          linelength = t - ptr - endlinelength;
         linelength = endmatch - ptr - ellength;  
1844          }          }
1845    
1846        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 1182  while (ptr < endptr) Line 1855  while (ptr < endptr)
1855          {          {
1856          int first = S_arg * 2;          int first = S_arg * 2;
1857          int last  = first + 1;          int last  = first + 1;
1858          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1859          fprintf(stdout, "X");          fprintf(stdout, "X");
1860          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1861          }          }
1862        else        else
1863  #endif  #endif
1864    
1865        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1866          matches, but not of course if the line is a non-match. */
1867    
1868        if (do_colour)        if (do_colour && !invert)
1869          {          {
1870          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1871            FWRITE(ptr, 1, offsets[0], stdout);
1872          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1873          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1874          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1875          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1876            stdout);            {
1877              startoffset = offsets[1];
1878              if (startoffset >= (int)linelength + endlinelength ||
1879                  !match_patterns(matchptr, length, options, startoffset, offsets,
1880                    &mrc))
1881                break;
1882              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1883              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1884              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1885              fprintf(stdout, "%c[00m", 0x1b);
1886              }
1887    
1888            /* In multiline mode, we may have already printed the complete line
1889            and its line-ending characters (if they matched the pattern), so there
1890            may be no more to print. */
1891    
1892            plength = (int)((linelength + endlinelength) - startoffset);
1893            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1894          }          }
1895        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1896          /* Not colouring; no need to search for further matches */
1897    
1898          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1899        }        }
1900    
1901      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1902        given, flush the output. */
1903    
1904        if (line_buffered) fflush(stdout);
1905      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1906    
1907      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1236  while (ptr < endptr) Line 1933  while (ptr < endptr)
1933    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1934    
1935    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1936    filepos += linelength + endlinelength;    filepos += (int)(linelength + endlinelength);
1937    linenumber++;    linenumber++;
1938    
1939      /* If input is line buffered, and the buffer is not yet full, read another
1940      line and add it into the buffer. */
1941    
1942      if (input_line_buffered && bufflength < (size_t)bufsize)
1943        {
1944        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1945        bufflength += add;
1946        endptr += add;
1947        }
1948    
1949    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1950    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1951    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1952    about to be lost, print them. */    about to be lost, print them. */
1953    
1954    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1955      {      {
1956      if (after_context > 0 &&      if (after_context > 0 &&
1957          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1958          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1959        {        {
1960        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1961        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 1256  while (ptr < endptr) Line 1963  while (ptr < endptr)
1963    
1964      /* Now do the shuffle */      /* Now do the shuffle */
1965    
1966      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1967      ptr -= MBUFTHIRD;      ptr -= bufthird;
1968    
1969  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
1970      if (frtype == FR_LIBZ)      if (frtype == FR_LIBZ)
1971        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
1972          gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);          gzread (ingz, main_buffer + 2*bufthird, bufthird);
1973      else      else
1974  #endif  #endif
1975    
1976  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
1977      if (frtype == FR_LIBBZ2)      if (frtype == FR_LIBBZ2)
1978        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
1979          BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);          BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1980      else      else
1981  #endif  #endif
1982    
1983      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*bufthird +
1984          (input_line_buffered?
1985      endptr = buffer + bufflength;         read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1986           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1987        endptr = main_buffer + bufflength;
1988    
1989      /* Adjust any last match point */      /* Adjust any last match point */
1990    
1991      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1992      }      }
1993    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1994    
1995  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1996  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1997    
1998  if (!only_matching && !count_only)  if (!show_only_matching && !count_only)
1999    {    {
2000    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2001    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1305  if (filenames == FN_NOMATCH_ONLY) Line 2014  if (filenames == FN_NOMATCH_ONLY)
2014    
2015  if (count_only)  if (count_only)
2016    {    {
2017    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
2018    fprintf(stdout, "%d\n", count);      {
2019        if (printname != NULL && filenames != FN_NONE)
2020          fprintf(stdout, "%s:", printname);
2021        fprintf(stdout, "%d\n", count);
2022        }
2023    }    }
2024    
2025  return rc;  return rc;
# Line 1326  Arguments: Line 2039  Arguments:
2039    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
2040    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
2041    
2042  Returns:   0 if there was at least one match  Returns:  -1 the file/directory was skipped
2043               0 if there was at least one match
2044             1 if there were no matches             1 if there were no matches
2045             2 there was some kind of error             2 there was some kind of error
2046    
# Line 1337  static int Line 2051  static int
2051  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2052  {  {
2053  int rc = 1;  int rc = 1;
 int sep;  
2054  int frtype;  int frtype;
 int pathlen;  
2055  void *handle;  void *handle;
2056    char *lastcomp;
2057  FILE *in = NULL;           /* Ensure initialized */  FILE *in = NULL;           /* Ensure initialized */
2058    
2059  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 1351  gzFile ingz = NULL; Line 2064  gzFile ingz = NULL;
2064  BZFILE *inbz2 = NULL;  BZFILE *inbz2 = NULL;
2065  #endif  #endif
2066    
2067    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2068    int pathlen;
2069    #endif
2070    
2071  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
2072    
2073  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
2074    {    {
2075    return pcregrep(stdin, FR_PLAIN,    return pcregrep(stdin, FR_PLAIN, stdin_name,
2076      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2077        stdin_name : NULL);        stdin_name : NULL);
2078    }    }
2079    
2080  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2081  each file within it, subject to any include or exclude patterns that were set.  directories, whereas --include and --exclude apply to everything else. The test
2082  The scanning code is localized so it can be made system-specific. */  is against the final component of the path. */
2083    
2084    lastcomp = strrchr(pathname, FILESEP);
2085    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2086    
2087    /* If the file is a directory, skip if not recursing or if explicitly excluded.
2088    Otherwise, scan the directory and recurse for each path within it. The scanning
2089    code is localized so it can be made system-specific. */
2090    
2091    if (isdirectory(pathname))
2092      {
2093      if (dee_action == dee_SKIP ||
2094          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2095        return -1;
2096    
 if ((sep = isdirectory(pathname)) != 0)  
   {  
   if (dee_action == dee_SKIP) return 1;  
2097    if (dee_action == dee_RECURSE)    if (dee_action == dee_RECURSE)
2098      {      {
2099      char buffer[1024];      char buffer[1024];
# Line 1383  if ((sep = isdirectory(pathname)) != 0) Line 2110  if ((sep = isdirectory(pathname)) != 0)
2110    
2111      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
2112        {        {
2113        int frc, blen;        int frc;
2114        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
       blen = strlen(buffer);  
   
       if (exclude_compiled != NULL &&  
           pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
         continue;  
   
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
   
2115        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2116        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
2117         else if (frc == 0 && rc == 1) rc = 0;         else if (frc == 0 && rc == 1) rc = 0;
# Line 1406  if ((sep = isdirectory(pathname)) != 0) Line 2123  if ((sep = isdirectory(pathname)) != 0)
2123    }    }
2124    
2125  /* If the file is not a directory and not a regular file, skip it if that's  /* If the file is not a directory and not a regular file, skip it if that's
2126  been requested. */  been requested. Otherwise, check for explicit include/exclude. */
2127    
2128  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;  else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2129              !test_incexc(lastcomp, include_patterns, exclude_patterns))
2130            return -1;
2131    
2132  /* Control reaches here if we have a regular file, or if we have a directory  /* Control reaches here if we have a regular file, or if we have a directory
2133  and recursion or skipping was not requested, or if we have anything else and  and recursion or skipping was not requested, or if we have anything else and
# Line 1416  skipping was not requested. The scan pro Line 2135  skipping was not requested. The scan pro
2135  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
2136  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
2137    
2138  pathlen = strlen(pathname);  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2139    pathlen = (int)(strlen(pathname));
2140    #endif
2141    
2142  /* Open using zlib if it is supported and the file name ends with .gz. */  /* Open using zlib if it is supported and the file name ends with .gz. */
2143    
# Line 1456  an attempt to read a .bz2 file indicates Line 2177  an attempt to read a .bz2 file indicates
2177  PLAIN_FILE:  PLAIN_FILE:
2178  #endif  #endif
2179    {    {
2180    in = fopen(pathname, "r");    in = fopen(pathname, "rb");
2181    handle = (void *)in;    handle = (void *)in;
2182    frtype = FR_PLAIN;    frtype = FR_PLAIN;
2183    }    }
# Line 1473  if (handle == NULL) Line 2194  if (handle == NULL)
2194    
2195  /* Now grep the file */  /* Now grep the file */
2196    
2197  rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||  rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2198    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2199    
2200  /* Close in an appropriate manner. */  /* Close in an appropriate manner. */
# Line 1484  if (frtype == FR_LIBZ) Line 2205  if (frtype == FR_LIBZ)
2205  else  else
2206  #endif  #endif
2207    
2208  /* If it is a .bz2 file and the result is 2, it means that the first attempt to  /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2209  read failed. If the error indicates that the file isn't in fact bzipped, try  read failed. If the error indicates that the file isn't in fact bzipped, try
2210  again as a normal file. */  again as a normal file. */
2211    
2212  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
2213  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
2214    {    {
2215    if (rc == 2)    if (rc == 3)
2216      {      {
2217      int errnum;      int errnum;
2218      const char *err = BZ2_bzerror(inbz2, &errnum);      const char *err = BZ2_bzerror(inbz2, &errnum);
# Line 1503  if (frtype == FR_LIBBZ2) Line 2224  if (frtype == FR_LIBBZ2)
2224      else if (!silent)      else if (!silent)
2225        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2226          pathname, err);          pathname, err);
2227        rc = 2;    /* The normal "something went wrong" code */
2228      }      }
2229    BZ2_bzclose(inbz2);    BZ2_bzclose(inbz2);
2230    }    }
# Line 1520  return rc; Line 2242  return rc;
2242    
2243    
2244    
   
 /*************************************************  
 *                Usage function                  *  
 *************************************************/  
   
 static int  
 usage(int rc)  
 {  
 option_item *op;  
 fprintf(stderr, "Usage: pcregrep [-");  
 for (op = optionlist; op->one_char != 0; op++)  
   {  
   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);  
   }  
 fprintf(stderr, "] [long options] [pattern] [files]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information and the long "  
   "options.\n");  
 return rc;  
 }  
   
   
   
   
 /*************************************************  
 *                Help function                   *  
 *************************************************/  
   
 static void  
 help(void)  
 {  
 option_item *op;  
   
 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  
 printf("Search for PATTERN in each FILE or standard input.\n");  
 printf("PATTERN must be present if neither -e nor -f is used.\n");  
 printf("\"-\" can be used as a file name to mean STDIN.\n");  
   
 #ifdef SUPPORT_LIBZ  
 printf("Files whose names end in .gz are read using zlib.\n");  
 #endif  
   
 #ifdef SUPPORT_LIBBZ2  
 printf("Files whose names end in .bz2 are read using bzlib2.\n");  
 #endif  
   
 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2  
 printf("Other files and the standard input are read as plain files.\n\n");  
 #else  
 printf("All files are read as plain files, without any interpretation.\n\n");  
 #endif  
   
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
 printf("Options:\n");  
   
 for (op = optionlist; op->one_char != 0; op++)  
   {  
   int n;  
   char s[4];  
   if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
   n = 30 - printf("  %s --%s", s, op->long_name);  
   if (n < 1) n = 1;  
   printf("%.*s%s\n", n, "                    ", op->help_text);  
   }  
   
 printf("\nWhen reading patterns from a file instead of using a command line option,\n");  
 printf("trailing white space is removed and blank lines are ignored.\n");  
 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
 }  
   
   
   
   
2245  /*************************************************  /*************************************************
2246  *    Handle a single-letter, no data option      *  *    Handle a single-letter, no data option      *
2247  *************************************************/  *************************************************/
# Line 1605  handle_option(int letter, int options) Line 2252  handle_option(int letter, int options)
2252  switch(letter)  switch(letter)
2253    {    {
2254    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
2255    case N_HELP: help(); exit(0);    case N_HELP: help(); pcregrep_exit(0);
2256      case N_LBUFFER: line_buffered = TRUE; break;
2257    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
2258      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2259      case 'a': binary_files = BIN_TEXT; break;
2260    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
2261    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
2262    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
2263      case 'I': binary_files = BIN_NOMATCH; break;
2264    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
2265    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
2266    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2267    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
2268    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2269    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
2270    case 'o': only_matching = TRUE; break;  
2271      case 'o':
2272      only_matching_last = add_number(0, only_matching_last);
2273      if (only_matching == NULL) only_matching = only_matching_last;
2274      break;
2275    
2276    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
2277    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
2278    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1626  switch(letter) Line 2282  switch(letter)
2282    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
2283    
2284    case 'V':    case 'V':
2285    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stdout, "pcregrep version %s\n", pcre_version());
2286    exit(0);    pcregrep_exit(0);
2287    break;    break;
2288    
2289    default:    default:
2290    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2291    exit(usage(2));    pcregrep_exit(usage(2));
2292    }    }
2293    
2294  return options;  return options;
# Line 1670  return buffer; Line 2326  return buffer;
2326  *          Compile a single pattern              *  *          Compile a single pattern              *
2327  *************************************************/  *************************************************/
2328    
2329  /* When the -F option has been used, this is called for each substring.  /* Do nothing if the pattern has already been compiled. This is the case for
2330  Otherwise it's called for each supplied pattern.  include/exclude patterns read from a file.
2331    
2332    When the -F option has been used, each "pattern" may be a list of strings,
2333    separated by line breaks. They will be matched literally. We split such a
2334    string and compile the first substring, inserting an additional block into the
2335    pattern chain.
2336    
2337  Arguments:  Arguments:
2338    pattern        the pattern string    p              points to the pattern block
2339    options        the PCRE options    options        the PCRE options
2340    filename       the file name, or NULL for a command-line pattern    popts          the processing options
2341      fromfile       TRUE if the pattern was read from a file
2342      fromtext       file name or identifying text (e.g. "include")
2343    count          0 if this is the only command line pattern, or    count          0 if this is the only command line pattern, or
2344                   number of the command line pattern, or                   number of the command line pattern, or
2345                   linenumber for a pattern from a file                   linenumber for a pattern from a file
# Line 1685  Returns:         TRUE on success, FALSE Line 2348  Returns:         TRUE on success, FALSE
2348  */  */
2349    
2350  static BOOL  static BOOL
2351  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_pattern(patstr *p, int options, int popts, int fromfile,
2352      const char *fromtext, int count)
2353  {  {
2354  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
2355  const char *error;  const char *error;
2356    char *ps = p->string;
2357    int patlen = strlen(ps);
2358  int errptr;  int errptr;
2359    
2360  if (pattern_count >= MAX_PATTERN_COUNT)  if (p->compiled != NULL) return TRUE;
   {  
   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",  
     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);  
   return FALSE;  
   }  
2361    
2362  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  if ((popts & PO_FIXED_STRINGS) != 0)
   suffix[process_options]);  
 pattern_list[pattern_count] =  
   pcre_compile(buffer, options, &error, &errptr, pcretables);  
 if (pattern_list[pattern_count] != NULL)  
2363    {    {
2364    pattern_count++;    int ellength;
2365    return TRUE;    char *eop = ps + patlen;
2366      char *pe = end_of_line(ps, eop, &ellength);
2367    
2368      if (ellength != 0)
2369        {
2370        if (add_pattern(pe, p) == NULL) return FALSE;
2371        patlen = (int)(pe - ps - ellength);
2372        }
2373    }    }
2374    
2375    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2376    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2377    if (p->compiled != NULL) return TRUE;
2378    
2379  /* Handle compile errors */  /* Handle compile errors */
2380    
2381  errptr -= (int)strlen(prefix[process_options]);  errptr -= (int)strlen(prefix[popts]);
2382  if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);  if (errptr > patlen) errptr = patlen;
2383    
2384  if (filename == NULL)  if (fromfile)
2385    {    {
2386    if (count == 0)    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2387      fprintf(stderr, "pcregrep: Error in command-line regex "      "at offset %d: %s\n", count, fromtext, errptr, error);
       "at offset %d: %s\n", errptr, error);  
   else  
     fprintf(stderr, "pcregrep: Error in %s command-line regex "  
       "at offset %d: %s\n", ordin(count), errptr, error);  
2388    }    }
2389  else  else
2390    {    {
2391    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "    if (count == 0)
2392      "at offset %d: %s\n", count, filename, errptr, error);      fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2393          fromtext, errptr, error);
2394      else
2395        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2396          ordin(count), fromtext, errptr, error);
2397    }    }
2398    
2399  return FALSE;  return FALSE;
# Line 1734  return FALSE; Line 2402  return FALSE;
2402    
2403    
2404  /*************************************************  /*************************************************
2405  *           Compile one supplied pattern         *  *     Read and compile a file of patterns        *
2406  *************************************************/  *************************************************/
2407    
2408  /* When the -F option has been used, each string may be a list of strings,  /* This is used for --filelist, --include-from, and --exclude-from.
 separated by line breaks. They will be matched literally.  
2409    
2410  Arguments:  Arguments:
2411    pattern        the pattern string    name         the name of the file; "-" is stdin
2412    options        the PCRE options    patptr       pointer to the pattern chain anchor
2413    filename       the file name, or NULL for a command-line pattern    patlastptr   pointer to the last pattern pointer
2414    count          0 if this is the only command line pattern, or    popts        the process options to pass to pattern_compile()
                  number of the command line pattern, or  
                  linenumber for a pattern from a file  
2415    
2416  Returns:         TRUE on success, FALSE after an error  Returns:       TRUE if all went well
2417  */  */
2418    
2419  static BOOL  static BOOL
2420  compile_pattern(char *pattern, int options, char *filename, int count)  read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2421  {  {
2422  if ((process_options & PO_FIXED_STRINGS) != 0)  int linenumber = 0;
2423    FILE *f;
2424    char *filename;
2425    char buffer[PATBUFSIZE];
2426    
2427    if (strcmp(name, "-") == 0)
2428      {
2429      f = stdin;
2430      filename = stdin_name;
2431      }
2432    else
2433      {
2434      f = fopen(name, "r");
2435      if (f == NULL)
2436        {
2437        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2438        return FALSE;
2439        }
2440      filename = name;
2441      }
2442    
2443    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2444    {    {
2445    char *eop = pattern + strlen(pattern);    char *s = buffer + (int)strlen(buffer);
2446    char buffer[MBUFTHIRD];    while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2447      *s = 0;
2448      linenumber++;
2449      if (buffer[0] == 0) continue;   /* Skip blank lines */
2450    
2451      /* Note: this call to add_pattern() puts a pointer to the local variable
2452      "buffer" into the pattern chain. However, that pointer is used only when
2453      compiling the pattern, which happens immediately below, so we flatten it
2454      afterwards, as a precaution against any later code trying to use it. */
2455    
2456      *patlastptr = add_pattern(buffer, *patlastptr);
2457      if (*patlastptr == NULL) return FALSE;
2458      if (*patptr == NULL) *patptr = *patlastptr;
2459    
2460      /* This loop is needed because compiling a "pattern" when -F is set may add
2461      on additional literal patterns if the original contains a newline. In the
2462      common case, it never will, because fgets() stops at a newline. However,
2463      the -N option can be used to give pcregrep a different newline setting. */
2464    
2465    for(;;)    for(;;)
2466      {      {
2467      int ellength;      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2468      char *p = end_of_line(pattern, eop, &ellength);          linenumber))
     if (ellength == 0)  
       return compile_single_pattern(pattern, options, filename, count);  
     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);  
     pattern = p;  
     if (!compile_single_pattern(buffer, options, filename, count))  
2469        return FALSE;        return FALSE;
2470        (*patlastptr)->string = NULL;            /* Insurance */
2471        if ((*patlastptr)->next == NULL) break;
2472        *patlastptr = (*patlastptr)->next;
2473      }      }
2474    }    }
2475  else return compile_single_pattern(pattern, options, filename, count);  
2476    if (f != stdin) fclose(f);
2477    return TRUE;
2478  }  }
2479    
2480    
# Line 1786  main(int argc, char **argv) Line 2490  main(int argc, char **argv)
2490  {  {
2491  int i, j;  int i, j;
2492  int rc = 1;  int rc = 1;
 int pcre_options = 0;  
 int cmd_pattern_count = 0;  
 int hint_count = 0;  
 int errptr;  
2493  BOOL only_one_at_top;  BOOL only_one_at_top;
2494  char *patterns[MAX_PATTERN_COUNT];  patstr *cp;
2495    fnstr *fn;
2496  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2497  const char *error;  const char *error;
2498    
2499    #ifdef SUPPORT_PCREGREP_JIT
2500    pcre_jit_stack *jit_stack = NULL;
2501    #endif
2502    
2503  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2504  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2505  */  Note that the return values from pcre_config(), though derived from the ASCII
2506    codes, are the same in EBCDIC environments, so we must use the actual values
2507    rather than escapes such as as '\r'. */
2508    
2509  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2510  switch(i)  switch(i)
2511    {    {
2512    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
2513    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
2514    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2515    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
2516    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
2517    }    }
2518    
2519  /* Process the options */  /* Process the options */
# Line 1825  for (i = 1; i < argc; i++) Line 2532  for (i = 1; i < argc; i++)
2532    
2533    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2534      {      {
2535      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_files != NULL || patterns != NULL) break;
2536        else exit(usage(2));        else pcregrep_exit(usage(2));
2537      }      }
2538    
2539    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1848  for (i = 1; i < argc; i++) Line 2555  for (i = 1; i < argc; i++)
2555      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2556      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2557      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2558      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2559      these categories, fortunately. */      both these categories. */
2560    
2561      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2562        {        {
2563        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2564        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2565        if (opbra == NULL)     /* Not a (p) case */  
2566          /* Handle options with only one spelling of the name */
2567    
2568          if (opbra == NULL)     /* Does not contain '(' */
2569          {          {
2570          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2571            {            {
# Line 1863  for (i = 1; i < argc; i++) Line 2573  for (i = 1; i < argc; i++)
2573            }            }
2574          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2575            {            {
2576            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2577            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2578                (int)strlen(arg) : (int)(argequals - arg);
2579            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2580              {              {
2581              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1877  for (i = 1; i < argc; i++) Line 2588  for (i = 1; i < argc; i++)
2588              }              }
2589            }            }
2590          }          }
2591        else                   /* Special case xxxx(p) */  
2592          /* Handle options with an alternate spelling of the name */
2593    
2594          else
2595          {          {
2596          char buff1[24];          char buff1[24];
2597          char buff2[24];          char buff2[24];
2598          int baselen = opbra - op->long_name;  
2599            int baselen = (int)(opbra - op->long_name);
2600            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2601            int arglen = (argequals == NULL || equals == NULL)?
2602              (int)strlen(arg) : (int)(argequals - arg);
2603    
2604          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2605          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2606            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2607          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2608               strncmp(arg, buff2, arglen) == 0)
2609              {
2610              if (equals != NULL && argequals != NULL)
2611                {
2612                option_data = argequals;
2613                if (*option_data == '=')
2614                  {
2615                  option_data++;
2616                  longopwasequals = TRUE;
2617                  }
2618                }
2619            break;            break;
2620              }
2621          }          }
2622        }        }
2623    
2624      if (op->one_char == 0)      if (op->one_char == 0)
2625        {        {
2626        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2627        exit(usage(2));        pcregrep_exit(usage(2));
2628        }        }
2629      }      }
2630    
   
2631    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2632    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2633    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1928  for (i = 1; i < argc; i++) Line 2658  for (i = 1; i < argc; i++)
2658      {      {
2659      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2660      longop = FALSE;      longop = FALSE;
2661    
2662      while (*s != 0)      while (*s != 0)
2663        {        {
2664        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2665          { if (*s == op->one_char) break; }          {
2666            if (*s == op->one_char) break;
2667            }
2668        if (op->one_char == 0)        if (op->one_char == 0)
2669          {          {
2670          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2671            *s, argv[i]);            *s, argv[i]);
2672          exit(usage(2));          pcregrep_exit(usage(2));
2673            }
2674    
2675          option_data = s+1;
2676    
2677          /* Break out if this is the last character in the string; it's handled
2678          below like a single multi-char option. */
2679    
2680          if (*option_data == 0) break;
2681    
2682          /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2683          are used for ones that either have a numerical number or defaults, i.e.
2684          the data is optional. If a digit follows, there is data; if not, carry on
2685          with other single-character options in the same string. */
2686    
2687          if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2688            {
2689            if (isdigit((unsigned char)s[1])) break;
2690          }          }
2691        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for an option with data */
2692          {          {
2693          option_data = s+1;          if (op->type != OP_NODATA) break;
         break;  
2694          }          }
2695    
2696          /* Handle a single-character option with no data, then loop for the
2697          next character in the string. */
2698    
2699        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2700        }        }
2701      }      }
# Line 1957  for (i = 1; i < argc; i++) Line 2710  for (i = 1; i < argc; i++)
2710      continue;      continue;
2711      }      }
2712    
2713    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2714    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2715    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2716    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2717    
2718    if (*option_data == 0 &&    if (*option_data == 0 &&
2719        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2720           op->type == OP_OP_NUMBERS))
2721      {      {
2722      switch (op->one_char)      switch (op->one_char)
2723        {        {
2724        case N_COLOUR:        case N_COLOUR:
2725        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2726        break;        break;
2727    
2728          case 'o':
2729          only_matching_last = add_number(0, only_matching_last);
2730          if (only_matching == NULL) only_matching = only_matching_last;
2731          break;
2732    
2733  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2734        case 'S':        case 'S':
2735        S_arg = 0;        S_arg = 0;
# Line 1986  for (i = 1; i < argc; i++) Line 2746  for (i = 1; i < argc; i++)
2746      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2747        {        {
2748        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2749        exit(usage(2));        pcregrep_exit(usage(2));
2750        }        }
2751      option_data = argv[++i];      option_data = argv[++i];
2752      }      }
2753    
2754    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2755    multiple times to create a list of patterns. */    added to a chain of numbers. */
2756    
2757    if (op->type == OP_PATLIST)    if (op->type == OP_OP_NUMBERS)
2758      {      {
2759      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      unsigned long int n = decode_number(option_data, op, longop);
2760        omdatastr *omd = (omdatastr *)op->dataptr;
2761        *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2762        if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2763        }
2764    
2765      /* If the option type is OP_PATLIST, it's the -e option, or one of the
2766      include/exclude options, which can be called multiple times to create lists
2767      of patterns. */
2768    
2769      else if (op->type == OP_PATLIST)
2770        {
2771        patdatastr *pd = (patdatastr *)op->dataptr;
2772        *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2773        if (*(pd->lastptr) == NULL) goto EXIT2;
2774        if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2775        }
2776    
2777      /* If the option type is OP_FILELIST, it's one of the options that names a
2778      file. */
2779    
2780      else if (op->type == OP_FILELIST)
2781        {
2782        fndatastr *fd = (fndatastr *)op->dataptr;
2783        fn = (fnstr *)malloc(sizeof(fnstr));
2784        if (fn == NULL)
2785        {        {
2786        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",        fprintf(stderr, "pcregrep: malloc failed\n");
2787          MAX_PATTERN_COUNT);        goto EXIT2;
2788        return 2;        }
2789        fn->next = NULL;
2790        fn->name = option_data;
2791        if (*(fd->anchor) == NULL)
2792          *(fd->anchor) = fn;
2793        else
2794          (*(fd->lastptr))->next = fn;
2795        *(fd->lastptr) = fn;
2796        }
2797    
2798      /* Handle OP_BINARY_FILES */
2799    
2800      else if (op->type == OP_BINFILES)
2801        {
2802        if (strcmp(option_data, "binary") == 0)
2803          binary_files = BIN_BINARY;
2804        else if (strcmp(option_data, "without-match") == 0)
2805          binary_files = BIN_NOMATCH;
2806        else if (strcmp(option_data, "text") == 0)
2807          binary_files = BIN_TEXT;
2808        else
2809          {
2810          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2811            option_data);
2812          pcregrep_exit(usage(2));
2813        }        }
     patterns[cmd_pattern_count++] = option_data;  
2814      }      }
2815    
2816    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with a single string or numeric data value. */
2817    
2818    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2819               op->type != OP_OP_NUMBER)
2820      {      {
2821      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2822      }      }
2823    else    else
2824      {      {
2825      char *endptr;      unsigned long int n = decode_number(option_data, op, longop);
2826      int n = strtoul(option_data, &endptr, 10);      if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2827      if (*endptr != 0)        else *((int *)op->dataptr) = n;
       {  
       if (longop)  
         {  
         char *equals = strchr(op->long_name, '=');  
         int nlen = (equals == NULL)? (int)strlen(op->long_name) :  
           equals - op->long_name;  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",  
           option_data, nlen, op->long_name);  
         }  
       else  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",  
           option_data, op->one_char);  
       exit(usage(2));  
       }  
     *((int *)op->dataptr) = n;  
2828      }      }
2829    }    }
2830    
# Line 2044  if (both_context > 0) Line 2838  if (both_context > 0)
2838    }    }
2839    
2840  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2841  However, the latter two set the only_matching flag. */  However, all three set show_only_matching because they display, each in their
2842    own way, only the data that has matched. */
2843    
2844  if ((only_matching && (file_offsets || line_offsets)) ||  if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2845      (file_offsets && line_offsets))      (file_offsets && line_offsets))
2846    {    {
2847    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2848      "and/or --line-offsets\n");      "and/or --line-offsets\n");
2849    exit(usage(2));    pcregrep_exit(usage(2));
2850    }    }
2851    
2852  if (file_offsets || line_offsets) only_matching = TRUE;  if (only_matching != NULL || file_offsets || line_offsets)
2853      show_only_matching = TRUE;
2854    
2855  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2856  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2178  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 2974  if (jfriedl_XT != 0 || jfriedl_XR != 0)
2974    }    }
2975  #endif  #endif
2976    
2977  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer. */
2978    
2979  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  bufsize = 3*bufthird;
2980  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  main_buffer = (char *)malloc(bufsize);
2981    
2982  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL)
2983    {    {
2984    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2985    goto EXIT2;    goto EXIT2;
2986    }    }
2987    
2988  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there are no files provided by -f,
2989  the first argument is the one and only pattern, and it must exist. */  the first argument is the one and only pattern, and it must exist. */
2990    
2991  if (cmd_pattern_count == 0 && pattern_filename == NULL)  if (patterns == NULL && pattern_files == NULL)
2992    {    {
2993    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2994    patterns[cmd_pattern_count++] = argv[i++];    patterns = patterns_last = add_pattern(argv[i++], NULL);
2995      if (patterns == NULL) goto EXIT2;
2996    }    }
2997    
2998  /* Compile the patterns that were provided on the command line, either by  /* Compile the patterns that were provided on the command line, either by
2999  multiple uses of -e or as a single unkeyed pattern. */  multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3000    after all the command-line options are read so that we know which PCRE options
3001    to use. When -F is used, compile_pattern() may add another block into the
3002    chain, so we must not access the next pointer till after the compile. */
3003    
3004  for (j = 0; j < cmd_pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3005    {    {
3006    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3007         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 1 && patterns->next == NULL)? 0 : j))
3008      goto EXIT2;      goto EXIT2;
3009    }    }
3010    
3011  /* Compile the regular expressions that are provided in a file. */  /* Read and compile the regular expressions that are provided in files. */
3012    
3013  if (pattern_filename != NULL)  for (fn = pattern_files; fn != NULL; fn = fn->next)
3014    {    {
3015    int linenumber = 0;    if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3016    FILE *f;      goto EXIT2;
3017    char *filename;    }
   char buffer[MBUFTHIRD];  
3018    
3019    if (strcmp(pattern_filename, "-") == 0)  /* Study the regular expressions, as we will be running them many times. If an
3020      {  extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3021      f = stdin;  returned, even if studying produces no data. */
     filename = stdin_name;  
     }  
   else  
     {  
     f = fopen(pattern_filename, "r");  
     if (f == NULL)  
       {  
       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,  
         strerror(errno));  
       goto EXIT2;  
       }  
     filename = pattern_filename;  
     }  
3022    
3023    while (fgets(buffer, MBUFTHIRD, f) != NULL)  if (match_limit > 0 || match_limit_recursion > 0)
3024      {    study_options |= PCRE_STUDY_EXTRA_NEEDED;
     char *s = buffer + (int)strlen(buffer);  
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;  
     *s = 0;  
     linenumber++;  
     if (buffer[0] == 0) continue;   /* Skip blank lines */  
     if (!compile_pattern(buffer, pcre_options, filename, linenumber))  
       goto EXIT2;  
     }  
3025    
3026    if (f != stdin) fclose(f);  /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
   }  
3027    
3028  /* Study the regular expressions, as we will be running them many times */  #ifdef SUPPORT_PCREGREP_JIT
3029    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3030      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3031    #endif
3032    
3033  for (j = 0; j < pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3034    {    {
3035    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
3036    if (error != NULL)    if (error != NULL)
3037      {      {
3038      char s[16];      char s[16];
3039      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3040      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3041      goto EXIT2;      goto EXIT2;
3042      }      }
3043    hint_count++;  #ifdef SUPPORT_PCREGREP_JIT
3044      if (jit_stack != NULL && cp->hint != NULL)
3045        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3046    #endif
3047    }    }
3048    
3049  /* If there are include or exclude patterns, compile them. */  /* If --match-limit or --recursion-limit was set, put the value(s) into the
3050    pcre_extra block for each pattern. There will always be an extra block because
3051    of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3052    
3053  if (exclude_pattern != NULL)  for (cp = patterns; cp != NULL; cp = cp->next)
3054    {    {
3055    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,    if (match_limit > 0)
     pcretables);  
   if (exclude_compiled == NULL)  
3056      {      {
3057      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3058        errptr, error);      cp->hint->match_limit = match_limit;
3059      goto EXIT2;      }
3060    
3061      if (match_limit_recursion > 0)
3062        {
3063        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3064        cp->hint->match_limit_recursion = match_limit_recursion;
3065      }      }
3066    }    }
3067    
3068  if (include_pattern != NULL)  /* If there are include or exclude patterns read from the command line, compile
3069    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3070    0. */
3071    
3072    for (j = 0; j < 4; j++)
3073    {    {
3074    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,    int k;
3075      pcretables);    for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
   if (include_compiled == NULL)  
3076      {      {
3077      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3078        errptr, error);           (k == 1 && cp->next == NULL)? 0 : k))
3079      goto EXIT2;        goto EXIT2;
3080      }      }
3081    }    }
3082    
3083  /* If there are no further arguments, do the business on stdin and exit. */  /* Read and compile include/exclude patterns from files. */
3084    
3085    for (fn = include_from; fn != NULL; fn = fn->next)
3086      {
3087      if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3088        goto EXIT2;
3089      }
3090    
3091    for (fn = exclude_from; fn != NULL; fn = fn->next)
3092      {
3093      if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3094        goto EXIT2;
3095      }
3096    
3097    /* If there are no files that contain lists of files to search, and there are
3098    no file arguments, search stdin, and then exit. */
3099    
3100  if (i >= argc)  if (file_lists == NULL && i >= argc)
3101    {    {
3102    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3103        (filenames > FN_DEFAULT)? stdin_name : NULL);
3104    goto EXIT;    goto EXIT;
3105    }    }
3106    
3107  /* Otherwise, work through the remaining arguments as files or directories.  /* If any files that contains a list of files to search have been specified,
3108  Pass in the fact that there is only one argument at top level - this suppresses  read them line by line and search the given files. */
3109  the file name if the argument is not a directory and filenames are not  
3110  otherwise forced. */  for (fn = file_lists; fn != NULL; fn = fn->next)
3111      {
3112      char buffer[PATBUFSIZE];
3113      FILE *fl;
3114      if (strcmp(fn->name, "-") == 0) fl = stdin; else
3115        {
3116        fl = fopen(fn->name, "rb");
3117        if (fl == NULL)
3118          {
3119          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3120            strerror(errno));
3121          goto EXIT2;
3122          }
3123        }
3124      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3125        {
3126        int frc;
3127        char *end = buffer + (int)strlen(buffer);
3128        while (end > buffer && isspace(end[-1])) end--;
3129        *end = 0;
3130        if (*buffer != 0)
3131          {
3132          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3133          if (frc > 1) rc = frc;
3134            else if (frc == 0 && rc == 1) rc = 0;
3135          }
3136        }
3137      if (fl != stdin) fclose(fl);
3138      }
3139    
3140    /* After handling file-list, work through remaining arguments. Pass in the fact
3141    that there is only one argument at top level - this suppresses the file name if
3142    the argument is not a directory and filenames are not otherwise forced. */
3143    
3144  only_one_at_top = i == argc - 1;   /* Catch initial value of i */  only_one_at_top = i == argc - 1 && file_lists == NULL;
3145    
3146  for (; i < argc; i++)  for (; i < argc; i++)
3147    {    {
# Line 2313  for (; i < argc; i++) Line 3152  for (; i < argc; i++)
3152    }    }
3153    
3154  EXIT:  EXIT:
3155  if (pattern_list != NULL)  #ifdef SUPPORT_PCREGREP_JIT
3156    {  if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3157    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);  #endif
3158    free(pattern_list);  
3159    }  if (main_buffer != NULL) free(main_buffer);
3160  if (hints_list != NULL)  
3161    free_pattern_chain(patterns);
3162    free_pattern_chain(include_patterns);
3163    free_pattern_chain(include_dir_patterns);
3164    free_pattern_chain(exclude_patterns);
3165    free_pattern_chain(exclude_dir_patterns);
3166    
3167    free_file_chain(exclude_from);
3168    free_file_chain(include_from);
3169    free_file_chain(pattern_files);
3170    free_file_chain(file_lists);
3171    
3172    while (only_matching != NULL)
3173    {    {
3174    for (i = 0; i < hint_count; i++) free(hints_list[i]);    omstr *this = only_matching;
3175    free(hints_list);    only_matching = this->next;
3176      free(this);
3177    }    }
3178  return rc;  
3179    pcregrep_exit(rc);
3180    
3181  EXIT2:  EXIT2:
3182  rc = 2;  rc = 2;

Legend:
Removed from v.296  
changed lines
  Added in v.1324

  ViewVC Help
Powered by ViewVC 1.1.5