/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 1033 by ph10, Mon Sep 10 11:02:48 2012 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define VERSION "4.4 29-Nov-2006"  #define OFFSET_SIZE 99
 #define MAX_PATTERN_COUNT 100  
74    
75  #if BUFSIZ > 8192  #if BUFSIZ > 8192
76  #define MBUFTHIRD BUFSIZ  #define MAXPATLEN BUFSIZ
77  #else  #else
78  #define MBUFTHIRD 8192  #define MAXPATLEN 8192
79  #endif  #endif
80    
81    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
82    
83  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
84  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
85  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
86    
87  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88    
89    /* File reading styles */
90    
91    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
92    
93  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
94    
# Line 84  enum { DEE_READ, DEE_SKIP }; Line 103  enum { DEE_READ, DEE_SKIP };
103    
104  /* Line ending types */  /* Line ending types */
105    
106  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107    
108    /* Binary file options */
109    
110    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111    
112    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113    environments), a warning is issued if the value of fwrite() is ignored.
114    Unfortunately, casting to (void) does not suppress the warning. To get round
115    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116    apply to fprintf(). */
117    
118    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119    
120    
121    
# Line 109  static char *colour_string = (char *)"1; Line 140  static char *colour_string = (char *)"1;
140  static char *colour_option = NULL;  static char *colour_option = NULL;
141  static char *dee_option = NULL;  static char *dee_option = NULL;
142  static char *DEE_option = NULL;  static char *DEE_option = NULL;
143    static char *locale = NULL;
144    static char *main_buffer = NULL;
145  static char *newline = NULL;  static char *newline = NULL;
 static char *pattern_filename = NULL;  
146  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
 static char *locale = NULL;  
147    
148  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
149    
 static int  pattern_count = 0;  
 static pcre **pattern_list;  
 static pcre_extra **hints_list;  
   
 static char *include_pattern = NULL;  
 static char *exclude_pattern = NULL;  
   
 static pcre *include_compiled = NULL;  
 static pcre *exclude_compiled = NULL;  
   
150  static int after_context = 0;  static int after_context = 0;
151  static int before_context = 0;  static int before_context = 0;
152    static int binary_files = BIN_BINARY;
153  static int both_context = 0;  static int both_context = 0;
154    static int bufthird = PCREGREP_BUFSIZE;
155    static int bufsize = 3*PCREGREP_BUFSIZE;
156    
157    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
158    static int dee_action = dee_SKIP;
159    #else
160  static int dee_action = dee_READ;  static int dee_action = dee_READ;
161    #endif
162    
163  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
164  static int error_count = 0;  static int error_count = 0;
165  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
166    static int only_matching = -1;
167    static int pcre_options = 0;
168  static int process_options = 0;  static int process_options = 0;
169    
170    #ifdef SUPPORT_PCREGREP_JIT
171    static int study_options = PCRE_STUDY_JIT_COMPILE;
172    #else
173    static int study_options = 0;
174    #endif
175    
176    static unsigned long int match_limit = 0;
177    static unsigned long int match_limit_recursion = 0;
178    
179  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
180  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
181    static BOOL file_offsets = FALSE;
182  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
183  static BOOL invert = FALSE;  static BOOL invert = FALSE;
184    static BOOL line_buffered = FALSE;
185    static BOOL line_offsets = FALSE;
186  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
187  static BOOL number = FALSE;  static BOOL number = FALSE;
188  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
189    static BOOL resource_error = FALSE;
190  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
191  static BOOL silent = FALSE;  static BOOL silent = FALSE;
192  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
193    
194    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
195    
196    typedef struct fnstr {
197      struct fnstr *next;
198      char *name;
199    } fnstr;
200    
201    static fnstr *exclude_from = NULL;
202    static fnstr *exclude_from_last = NULL;
203    static fnstr *include_from = NULL;
204    static fnstr *include_from_last = NULL;
205    
206    static fnstr *file_lists = NULL;
207    static fnstr *file_lists_last = NULL;
208    static fnstr *pattern_files = NULL;
209    static fnstr *pattern_files_last = NULL;
210    
211    /* Structure for holding the two variables that describe a file name chain. */
212    
213    typedef struct fndatastr {
214      fnstr **anchor;
215      fnstr **lastptr;
216    } fndatastr;
217    
218    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
219    static fndatastr include_from_data = { &include_from, &include_from_last };
220    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
221    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
222    
223    /* Structure for pattern and its compiled form; used for matching patterns and
224    also for include/exclude patterns. */
225    
226    typedef struct patstr {
227      struct patstr *next;
228      char *string;
229      pcre *compiled;
230      pcre_extra *hint;
231    } patstr;
232    
233    static patstr *patterns = NULL;
234    static patstr *patterns_last = NULL;
235    static patstr *include_patterns = NULL;
236    static patstr *include_patterns_last = NULL;
237    static patstr *exclude_patterns = NULL;
238    static patstr *exclude_patterns_last = NULL;
239    static patstr *include_dir_patterns = NULL;
240    static patstr *include_dir_patterns_last = NULL;
241    static patstr *exclude_dir_patterns = NULL;
242    static patstr *exclude_dir_patterns_last = NULL;
243    
244    /* Structure holding the two variables that describe a pattern chain. A pointer
245    to such structures is used for each appropriate option. */
246    
247    typedef struct patdatastr {
248      patstr **anchor;
249      patstr **lastptr;
250    } patdatastr;
251    
252    static patdatastr match_patdata = { &patterns, &patterns_last };
253    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
254    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
255    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
256    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
257    
258    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
259                                     &include_dir_patterns, &exclude_dir_patterns };
260    
261    static const char *incexname[4] = { "--include", "--exclude",
262                                        "--include-dir", "--exclude-dir" };
263    
264  /* Structure for options and list of them */  /* Structure for options and list of them */
265    
266  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
267         OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST, OP_FILELIST, OP_BINFILES };
268    
269  typedef struct option_item {  typedef struct option_item {
270    int type;    int type;
# Line 162  typedef struct option_item { Line 277  typedef struct option_item {
277  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
278  used to identify them. */  used to identify them. */
279    
280  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
281  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
282  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
283  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
284  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
285  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
286  #define N_NULL      (-7)  #define N_LABEL        (-7)
287    #define N_LOCALE       (-8)
288    #define N_NULL         (-9)
289    #define N_LOFFSETS     (-10)
290    #define N_FOFFSETS     (-11)
291    #define N_LBUFFER      (-12)
292    #define N_M_LIMIT      (-13)
293    #define N_M_LIMIT_REC  (-14)
294    #define N_BUFSIZE      (-15)
295    #define N_NOJIT        (-16)
296    #define N_FILE_LIST    (-17)
297    #define N_BINARY_FILES (-18)
298    #define N_EXCLUDE_FROM (-19)
299    #define N_INCLUDE_FROM (-20)
300    
301  static option_item optionlist[] = {  static option_item optionlist[] = {
302    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
303    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
304    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
305    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
306    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
307    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
308    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
309    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
310    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
311    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
312    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
313    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
314    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
315    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
316    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
317    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
318    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
319    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
320    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
321    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
322    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
323    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
324    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },  #ifdef SUPPORT_PCREGREP_JIT
325    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
326    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },  #else
327    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
328    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },  #endif
329    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
330      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
331      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
332      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
333      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
334      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
335      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
336      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
337      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
338      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
339      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
340      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
341      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
342      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
343      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
344      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
345      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
346      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
347      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
348      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
349    
350      /* These two were accidentally implemented with underscores instead of
351      hyphens in the option names. As this was not discovered for several releases,
352      the incorrect versions are left in the table for compatibility. However, the
353      --help function misses out any option that has an underscore in its name. */
354    
355      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
356      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
357    
358  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
359    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
360  #endif  #endif
# Line 214  static option_item optionlist[] = { Line 370  static option_item optionlist[] = {
370  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
371  options. These set the 1, 2, and 4 bits in process_options, respectively. Note  options. These set the 1, 2, and 4 bits in process_options, respectively. Note
372  that the combination of -w and -x has the same effect as -x on its own, so we  that the combination of -w and -x has the same effect as -x on its own, so we
373  can treat them as the same. */  can treat them as the same. Note that the MAXPATLEN macro assumes the longest
374    prefix+suffix is 10 characters; if anything longer is added, it must be
375    adjusted. */
376    
377  static const char *prefix[] = {  static const char *prefix[] = {
378    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
# Line 222  static const char *prefix[] = { Line 380  static const char *prefix[] = {
380  static const char *suffix[] = {  static const char *suffix[] = {
381    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
382    
383  /* UTF-8 tables - used only when the newline setting is "all". */  /* UTF-8 tables - used only when the newline setting is "any". */
384    
385  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
386    
# Line 235  const char utf8_table4[] = { Line 393  const char utf8_table4[] = {
393    
394    
395  /*************************************************  /*************************************************
396    *          Add item to chain of patterns         *
397    *************************************************/
398    
399    /* Used to add an item onto a chain, or just return an unconnected item if the
400    "after" argument is NULL.
401    
402    Arguments:
403      s          pattern string to add
404      after      if not NULL points to item to insert after
405    
406    Returns:     new pattern block, or NULL after malloc failure
407    */
408    
409    static patstr *
410    add_pattern(char *s, patstr *after)
411    {
412    patstr *p = (patstr *)malloc(sizeof(patstr));
413    if (p == NULL)
414      {
415      fprintf(stderr, "pcregrep: malloc failed\n");
416      return NULL;
417      }
418    if (strlen(s) > MAXPATLEN)
419      {
420      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
421        MAXPATLEN);
422      return NULL;
423      }
424    p->next = NULL;
425    p->string = s;
426    p->compiled = NULL;
427    p->hint = NULL;
428    
429    if (after != NULL)
430      {
431      p->next = after->next;
432      after->next = p;
433      }
434    return p;
435    }
436    
437    
438    /*************************************************
439    *           Free chain of patterns               *
440    *************************************************/
441    
442    /* Used for several chains of patterns.
443    
444    Argument: pointer to start of chain
445    Returns:  nothing
446    */
447    
448    static void
449    free_pattern_chain(patstr *pc)
450    {
451    while (pc != NULL)
452      {
453      patstr *p = pc;
454      pc = p->next;
455      if (p->hint != NULL) pcre_free_study(p->hint);
456      if (p->compiled != NULL) pcre_free(p->compiled);
457      free(p);
458      }
459    }
460    
461    
462    /*************************************************
463    *           Free chain of file names             *
464    *************************************************/
465    
466    /*
467    Argument: pointer to start of chain
468    Returns:  nothing
469    */
470    
471    static void
472    free_file_chain(fnstr *fn)
473    {
474    while (fn != NULL)
475      {
476      fnstr *f = fn;
477      fn = f->next;
478      free(f);
479      }
480    }
481    
482    
483    /*************************************************
484    *         Exit from the program                  *
485    *************************************************/
486    
487    /* If there has been a resource error, give a suitable message.
488    
489    Argument:  the return code
490    Returns:   does not return
491    */
492    
493    static void
494    pcregrep_exit(int rc)
495    {
496    if (resource_error)
497      {
498      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
499        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
500        PCRE_ERROR_JIT_STACKLIMIT);
501      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
502      }
503    
504    exit(rc);
505    }
506    
507    
508    /*************************************************
509  *            OS-specific functions               *  *            OS-specific functions               *
510  *************************************************/  *************************************************/
511    
# Line 244  although at present the only ones are fo Line 515  although at present the only ones are fo
515    
516  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
517    
518  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
519  #include <sys/types.h>  #include <sys/types.h>
520  #include <sys/stat.h>  #include <sys/stat.h>
521  #include <dirent.h>  #include <dirent.h>
522    
523  typedef DIR directory_type;  typedef DIR directory_type;
524    #define FILESEP '/'
525    
526  static int  static int
527  isdirectory(char *filename)  isdirectory(char *filename)
# Line 257  isdirectory(char *filename) Line 529  isdirectory(char *filename)
529  struct stat statbuf;  struct stat statbuf;
530  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
531    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
532  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
533  }  }
534    
535  static directory_type *  static directory_type *
# Line 276  for (;;) Line 548  for (;;)
548    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
549      return dent->d_name;      return dent->d_name;
550    }    }
551  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
552  }  }
553    
554  static void  static void
# Line 298  return (statbuf.st_mode & S_IFMT) == S_I Line 570  return (statbuf.st_mode & S_IFMT) == S_I
570  }  }
571    
572    
573  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
574    
575  static BOOL  static BOOL
576  is_stdout_tty(void)  is_stdout_tty(void)
# Line 306  is_stdout_tty(void) Line 578  is_stdout_tty(void)
578  return isatty(fileno(stdout));  return isatty(fileno(stdout));
579  }  }
580    
581    static BOOL
582    is_file_tty(FILE *f)
583    {
584    return isatty(fileno(f));
585    }
586    
587    
588  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
589    
590  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
591  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
592  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
593    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
594    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
595    undefined when it is indeed undefined. */
596    
597  #elif HAVE_WIN32API  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
598    
599  #ifndef STRICT  #ifndef STRICT
600  # define STRICT  # define STRICT
# Line 322  when it did not exist. */ Line 602  when it did not exist. */
602  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
603  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
604  #endif  #endif
605    
606    #include <windows.h>
607    
608  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
609  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
610  #endif  #endif
611    
 #include <windows.h>  
   
612  typedef struct directory_type  typedef struct directory_type
613  {  {
614  HANDLE handle;  HANDLE handle;
# Line 335  BOOL first; Line 616  BOOL first;
616  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
617  } directory_type;  } directory_type;
618    
619    #define FILESEP '/'
620    
621  int  int
622  isdirectory(char *filename)  isdirectory(char *filename)
623  {  {
624  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
625  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
626    return 0;    return 0;
627  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
628  }  }
629    
630  directory_type *  directory_type *
# Line 352  char *pattern; Line 635  char *pattern;
635  directory_type *dir;  directory_type *dir;
636  DWORD err;  DWORD err;
637  len = strlen(filename);  len = strlen(filename);
638  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
639  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
640  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
641    {    {
642    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
643    exit(2);    pcregrep_exit(2);
644    }    }
645  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
646  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 412  regular if they are not directories. */ Line 695  regular if they are not directories. */
695    
696  int isregfile(char *filename)  int isregfile(char *filename)
697  {  {
698  return !isdirectory(filename)  return !isdirectory(filename);
699  }  }
700    
701    
702  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
703    
704  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
705    
706  static BOOL  static BOOL
707  is_stdout_tty(void)  is_stdout_tty(void)
708  {  {
709  FALSE;  return FALSE;
710    }
711    
712    static BOOL
713    is_file_tty(FILE *f)
714    {
715    return FALSE;
716  }  }
717    
718    
# Line 433  FALSE; Line 722  FALSE;
722    
723  #else  #else
724    
725    #define FILESEP 0
726  typedef void directory_type;  typedef void directory_type;
727    
728  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
729  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
730  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
731  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
732    
733    
# Line 448  void closedirectory(directory_type *dir) Line 738  void closedirectory(directory_type *dir)
738  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
739    
740    
741  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
742    
743  static BOOL  static BOOL
744  is_stdout_tty(void)  is_stdout_tty(void)
# Line 456  is_stdout_tty(void) Line 746  is_stdout_tty(void)
746  return FALSE;  return FALSE;
747  }  }
748    
749    static BOOL
750    is_file_tty(FILE *f)
751    {
752    return FALSE;
753    }
754    
755  #endif  #endif
756    
757    
758    
759  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
760  /*************************************************  /*************************************************
761  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
762  *************************************************/  *************************************************/
# Line 484  return sys_errlist[n]; Line 779  return sys_errlist[n];
779    
780    
781  /*************************************************  /*************************************************
782    *            Test exclude/includes               *
783    *************************************************/
784    
785    /* If any exclude pattern matches, the path is excluded. Otherwise, unless
786    there are no includes, the path must match an include pattern.
787    
788    Arguments:
789      path      the path to be matched
790      ip        the chain of include patterns
791      ep        the chain of exclude patterns
792    
793    Returns:    TRUE if the path is not excluded
794    */
795    
796    static BOOL
797    test_incexc(char *path, patstr *ip, patstr *ep)
798    {
799    int plen = strlen(path);
800    
801    for (; ep != NULL; ep = ep->next)
802      {
803      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
804        return FALSE;
805      }
806    
807    if (ip == NULL) return TRUE;
808    
809    for (; ip != NULL; ip = ip->next)
810      {
811      if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
812        return TRUE;
813      }
814    
815    return FALSE;
816    }
817    
818    
819    
820    /*************************************************
821    *            Read one line of input              *
822    *************************************************/
823    
824    /* Normally, input is read using fread() into a large buffer, so many lines may
825    be read at once. However, doing this for tty input means that no output appears
826    until a lot of input has been typed. Instead, tty input is handled line by
827    line. We cannot use fgets() for this, because it does not stop at a binary
828    zero, and therefore there is no way of telling how many characters it has read,
829    because there may be binary zeros embedded in the data.
830    
831    Arguments:
832      buffer     the buffer to read into
833      length     the maximum number of characters to read
834      f          the file
835    
836    Returns:     the number of characters read, zero at end of file
837    */
838    
839    static unsigned int
840    read_one_line(char *buffer, int length, FILE *f)
841    {
842    int c;
843    int yield = 0;
844    while ((c = fgetc(f)) != EOF)
845      {
846      buffer[yield++] = c;
847      if (c == '\n' || yield >= length) break;
848      }
849    return yield;
850    }
851    
852    
853    
854    /*************************************************
855  *             Find end of line                   *  *             Find end of line                   *
856  *************************************************/  *************************************************/
857    
# Line 495  Arguments: Line 863  Arguments:
863    endptr    end of available data    endptr    end of available data
864    lenptr    where to put the length of the eol sequence    lenptr    where to put the length of the eol sequence
865    
866  Returns:    pointer to the last byte of the line  Returns:    pointer after the last byte of the line,
867                including the newline byte(s)
868  */  */
869    
870  static char *  static char *
# Line 541  switch(endlinetype) Line 910  switch(endlinetype)
910      }      }
911    break;    break;
912    
913      case EL_ANYCRLF:
914      while (p < endptr)
915        {
916        int extra = 0;
917        register int c = *((unsigned char *)p);
918    
919        if (utf8 && c >= 0xc0)
920          {
921          int gcii, gcss;
922          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
923          gcss = 6*extra;
924          c = (c & utf8_table3[extra]) << gcss;
925          for (gcii = 1; gcii <= extra; gcii++)
926            {
927            gcss -= 6;
928            c |= (p[gcii] & 0x3f) << gcss;
929            }
930          }
931    
932        p += 1 + extra;
933    
934        switch (c)
935          {
936          case '\n':
937          *lenptr = 1;
938          return p;
939    
940          case '\r':
941          if (p < endptr && *p == '\n')
942            {
943            *lenptr = 2;
944            p++;
945            }
946          else *lenptr = 1;
947          return p;
948    
949          default:
950          break;
951          }
952        }   /* End of loop for ANYCRLF case */
953    
954      *lenptr = 0;  /* Must have hit the end */
955      return endptr;
956    
957    case EL_ANY:    case EL_ANY:
958    while (p < endptr)    while (p < endptr)
959      {      {
# Line 564  switch(endlinetype) Line 977  switch(endlinetype)
977    
978      switch (c)      switch (c)
979        {        {
980        case 0x0a:    /* LF */        case '\n':    /* LF */
981        case 0x0b:    /* VT */        case '\v':    /* VT */
982        case 0x0c:    /* FF */        case '\f':    /* FF */
983        *lenptr = 1;        *lenptr = 1;
984        return p;        return p;
985    
986        case 0x0d:    /* CR */        case '\r':    /* CR */
987        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
988          {          {
989          *lenptr = 2;          *lenptr = 2;
990          p++;          p++;
# Line 579  switch(endlinetype) Line 992  switch(endlinetype)
992        else *lenptr = 1;        else *lenptr = 1;
993        return p;        return p;
994    
995        case 0x85:    /* NEL */  #ifndef EBCDIC
996          case 0x85:    /* Unicode NEL */
997        *lenptr = utf8? 2 : 1;        *lenptr = utf8? 2 : 1;
998        return p;        return p;
999    
1000        case 0x2028:  /* LS */        case 0x2028:  /* Unicode LS */
1001        case 0x2029:  /* PS */        case 0x2029:  /* Unicode PS */
1002        *lenptr = 3;        *lenptr = 3;
1003        return p;        return p;
1004    #endif  /* Not EBCDIC */
1005    
1006        default:        default:
1007        break;        break;
# Line 639  switch(endlinetype) Line 1054  switch(endlinetype)
1054    return p;   /* But control should never get here */    return p;   /* But control should never get here */
1055    
1056    case EL_ANY:    case EL_ANY:
1057      case EL_ANYCRLF:
1058    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1059    if (utf8) while ((*p & 0xc0) == 0x80) p--;    if (utf8) while ((*p & 0xc0) == 0x80) p--;
1060    
# Line 667  switch(endlinetype) Line 1083  switch(endlinetype)
1083        }        }
1084      else c = *((unsigned char *)pp);      else c = *((unsigned char *)pp);
1085    
1086      switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
1087        {        {
1088        case 0x0a:    /* LF */        case '\n':    /* LF */
1089        case 0x0b:    /* VT */        case '\r':    /* CR */
1090        case 0x0c:    /* FF */        return p;
1091        case 0x0d:    /* CR */  
1092        case 0x85:    /* NEL */        default:
1093        case 0x2028:  /* LS */        break;
1094        case 0x2029:  /* PS */        }
1095    
1096        else switch (c)
1097          {
1098          case '\n':    /* LF */
1099          case '\v':    /* VT */
1100          case '\f':    /* FF */
1101          case '\r':    /* CR */
1102    #ifndef EBCDIE
1103          case 0x85:    /* Unicode NEL */
1104          case 0x2028:  /* Unicode LS */
1105          case 0x2029:  /* Unicode PS */
1106    #endif  /* Not EBCDIC */
1107        return p;        return p;
1108    
1109        default:        default:
# Line 710  Arguments: Line 1138  Arguments:
1138  Returns:            nothing  Returns:            nothing
1139  */  */
1140    
1141  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,  static void
1142    char *endptr, char *printname)  do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1143      char *printname)
1144  {  {
1145  if (after_context > 0 && lastmatchnumber > 0)  if (after_context > 0 && lastmatchnumber > 0)
1146    {    {
# Line 723  if (after_context > 0 && lastmatchnumber Line 1152  if (after_context > 0 && lastmatchnumber
1152      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
1153      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1154      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
1155      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1156      lastmatchrestart = pp;      lastmatchrestart = pp;
1157      }      }
1158    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 733  if (after_context > 0 && lastmatchnumber Line 1162  if (after_context > 0 && lastmatchnumber
1162    
1163    
1164  /*************************************************  /*************************************************
1165    *   Apply patterns to subject till one matches   *
1166    *************************************************/
1167    
1168    /* This function is called to run through all patterns, looking for a match. It
1169    is used multiple times for the same subject when colouring is enabled, in order
1170    to find all possible matches.
1171    
1172    Arguments:
1173      matchptr     the start of the subject
1174      length       the length of the subject to match
1175      startoffset  where to start matching
1176      offsets      the offets vector to fill in
1177      mrc          address of where to put the result of pcre_exec()
1178    
1179    Returns:      TRUE if there was a match
1180                  FALSE if there was no match
1181                  invert if there was a non-fatal error
1182    */
1183    
1184    static BOOL
1185    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
1186      int *mrc)
1187    {
1188    int i;
1189    size_t slen = length;
1190    patstr *p = patterns;
1191    const char *msg = "this text:\n\n";
1192    
1193    if (slen > 200)
1194      {
1195      slen = 200;
1196      msg = "text that starts:\n\n";
1197      }
1198    for (i = 1; p != NULL; p = p->next, i++)
1199      {
1200      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1201        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1202      if (*mrc >= 0) return TRUE;
1203      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1204      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1205      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1206      fprintf(stderr, "%s", msg);
1207      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1208      fprintf(stderr, "\n\n");
1209      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1210          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1211        resource_error = TRUE;
1212      if (error_count++ > 20)
1213        {
1214        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1215        pcregrep_exit(2);
1216        }
1217      return invert;    /* No more matching; don't show the line again */
1218      }
1219    
1220    return FALSE;  /* No match, no errors */
1221    }
1222    
1223    
1224    
1225    /*************************************************
1226  *            Grep an individual file             *  *            Grep an individual file             *
1227  *************************************************/  *************************************************/
1228    
1229  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1230  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1231  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1232  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1233  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
1234  "before" context printing.  "before" context printing.
1235    
1236  Arguments:  Arguments:
1237    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
1238                   the gzFile pointer when reading is via libz
1239                   the BZFILE pointer when reading is via libbz2
1240      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1241      filename     the file name or NULL (for errors)
1242    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1243                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1244                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1245    
1246  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1247                 1 otherwise (no matches)                 1 otherwise (no matches)
1248                   2 if an overlong line is encountered
1249                   3 if there is a read error on a .bz2 file
1250  */  */
1251    
1252  static int  static int
1253  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1254  {  {
1255  int rc = 1;  int rc = 1;
1256  int linenumber = 1;  int linenumber = 1;
1257  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1258  int count = 0;  int count = 0;
1259  int offsets[99];  int filepos = 0;
1260    int offsets[OFFSET_SIZE];
1261  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1262  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1263  char *endptr;  char *endptr;
1264  size_t bufflength;  size_t bufflength;
1265    BOOL binary = FALSE;
1266  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1267    BOOL input_line_buffered = line_buffered;
1268    FILE *in = NULL;                    /* Ensure initialized */
1269    
1270    #ifdef SUPPORT_LIBZ
1271    gzFile ingz = NULL;
1272    #endif
1273    
1274    #ifdef SUPPORT_LIBBZ2
1275    BZFILE *inbz2 = NULL;
1276    #endif
1277    
1278    
1279  /* Do the first read into the start of the buffer and set up the pointer to  /* Do the first read into the start of the buffer and set up the pointer to end
1280  end of what we have. */  of what we have. In the case of libz, a non-zipped .gz file will be read as a
1281    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1282    fail. */
1283    
1284    #ifdef SUPPORT_LIBZ
1285    if (frtype == FR_LIBZ)
1286      {
1287      ingz = (gzFile)handle;
1288      bufflength = gzread (ingz, main_buffer, bufsize);
1289      }
1290    else
1291    #endif
1292    
1293  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  #ifdef SUPPORT_LIBBZ2
1294  endptr = buffer + bufflength;  if (frtype == FR_LIBBZ2)
1295      {
1296      inbz2 = (BZFILE *)handle;
1297      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1298      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1299      }                                    /* without the cast it is unsigned. */
1300    else
1301    #endif
1302    
1303      {
1304      in = (FILE *)handle;
1305      if (is_file_tty(in)) input_line_buffered = TRUE;
1306      bufflength = input_line_buffered?
1307        read_one_line(main_buffer, bufsize, in) :
1308        fread(main_buffer, 1, bufsize, in);
1309      }
1310    
1311    endptr = main_buffer + bufflength;
1312    
1313    /* Unless binary-files=text, see if we have a binary file. This uses the same
1314    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1315    file. */
1316    
1317    if (binary_files != BIN_TEXT)
1318      {
1319      binary =
1320        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1321      if (binary && binary_files == BIN_NOMATCH) return 1;
1322      }
1323    
1324  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1325  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 781  way, the buffer is shifted left and re-f Line 1328  way, the buffer is shifted left and re-f
1328    
1329  while (ptr < endptr)  while (ptr < endptr)
1330    {    {
1331    int i, endlinelength;    int endlinelength;
1332    int mrc = 0;    int mrc = 0;
1333    BOOL match = FALSE;    int startoffset = 0;
1334      BOOL match;
1335      char *matchptr = ptr;
1336    char *t = ptr;    char *t = ptr;
1337    size_t length, linelength;    size_t length, linelength;
1338    
1339    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1340    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1341    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1342    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1343    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1344    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1345      first line. */
1346    
1347    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1348    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1349    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1350    
1351      /* Check to see if the line we are looking at extends right to the very end
1352      of the buffer without a line terminator. This means the line is too long to
1353      handle. */
1354    
1355      if (endlinelength == 0 && t == main_buffer + bufsize)
1356        {
1357        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1358                        "pcregrep: check the --buffer-size option\n",
1359                        linenumber,
1360                        (filename == NULL)? "" : " of file ",
1361                        (filename == NULL)? "" : filename);
1362        return 2;
1363        }
1364    
1365    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1366    
# Line 807  while (ptr < endptr) Line 1371  while (ptr < endptr)
1371        #include <time.h>        #include <time.h>
1372        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1373        struct timezone dummy;        struct timezone dummy;
1374          int i;
1375    
1376        if (jfriedl_XT)        if (jfriedl_XT)
1377        {        {
# Line 815  while (ptr < endptr) Line 1380  while (ptr < endptr)
1380            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1381            if (!ptr) {            if (!ptr) {
1382                    printf("out of memory");                    printf("out of memory");
1383                    exit(2);                    pcregrep_exit(2);
1384            }            }
1385            endptr = ptr;            endptr = ptr;
1386            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 832  while (ptr < endptr) Line 1397  while (ptr < endptr)
1397    
1398    
1399        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1400            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1401                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1402    
1403        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1404                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 846  while (ptr < endptr) Line 1412  while (ptr < endptr)
1412    }    }
1413  #endif  #endif
1414    
1415      /* We come back here after a match when the -o option (only_matching) is set,
1416      in order to find any further matches in the same line. */
1417    
1418    /* Run through all the patterns until one matches. Note that we don't include    ONLY_MATCHING_RESTART:
   the final newline in the subject string. */  
1419    
1420    for (i = 0; i < pattern_count; i++)    /* Run through all the patterns until one matches or there is an error other
1421      {    than NOMATCH. This code is in a subroutine so that it can be re-used for
1422      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    finding subsequent matches when colouring matched lines. */
1423        offsets, 99);  
1424      if (mrc >= 0) { match = TRUE; break; }    match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1425    
1426    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1427    
# Line 893  while (ptr < endptr) Line 1437  while (ptr < endptr)
1437    
1438      if (count_only) count++;      if (count_only) count++;
1439    
1440        /* When handling a binary file and binary-files==binary, the "binary"
1441        variable will be set true (it's false in all other cases). In this
1442        situation we just want to output the file name. No need to scan further. */
1443    
1444        else if (binary)
1445          {
1446          fprintf(stdout, "Binary file %s matches\n", filename);
1447          return 0;
1448          }
1449    
1450      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1451      in the file. */      in the file. */
1452    
1453      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1454        {        {
1455        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1456        return 0;        return 0;
# Line 906  while (ptr < endptr) Line 1460  while (ptr < endptr)
1460    
1461      else if (quiet) return 0;      else if (quiet) return 0;
1462    
1463      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, or a
1464      does not pring any context. */      captured portion of it, as long as this string is not empty, and the
1465        --file-offsets and --line-offsets options output offsets for the matching
1466        substring (they both force --only-matching = 0). None of these options
1467        prints any context. Afterwards, adjust the start and then jump back to look
1468        for further matches in the same line. If we are in invert mode, however,
1469        nothing is printed and we do not restart - this could still be useful
1470        because the return code is set. */
1471    
1472      else if (only_matching)      else if (only_matching >= 0)
1473        {        {
1474        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1475        if (number) fprintf(stdout, "%d:", linenumber);          {
1476        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1477        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1478            if (line_offsets)
1479              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1480                offsets[1] - offsets[0]);
1481            else if (file_offsets)
1482              fprintf(stdout, "%d,%d\n",
1483                (int)(filepos + matchptr + offsets[0] - ptr),
1484                offsets[1] - offsets[0]);
1485            else if (only_matching < mrc)
1486              {
1487              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1488              if (plen > 0)
1489                {
1490                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1491                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1492                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1493                fprintf(stdout, "\n");
1494                }
1495              }
1496            else if (printname != NULL || number) fprintf(stdout, "\n");
1497            match = FALSE;
1498            if (line_buffered) fflush(stdout);
1499            rc = 0;                      /* Had some success */
1500            startoffset = offsets[1];    /* Restart after the match */
1501            goto ONLY_MATCHING_RESTART;
1502            }
1503        }        }
1504    
1505      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 948  while (ptr < endptr) Line 1533  while (ptr < endptr)
1533            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1534            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1535            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1536            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1537            lastmatchrestart = pp;            lastmatchrestart = pp;
1538            }            }
1539          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 971  while (ptr < endptr) Line 1556  while (ptr < endptr)
1556          int linecount = 0;          int linecount = 0;
1557          char *p = ptr;          char *p = ptr;
1558    
1559          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1560                 linecount < before_context)                 linecount < before_context)
1561            {            {
1562            linecount++;            linecount++;
1563            p = previous_line(p, buffer);            p = previous_line(p, main_buffer);
1564            }            }
1565    
1566          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 988  while (ptr < endptr) Line 1573  while (ptr < endptr)
1573            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1574            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1575            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1576            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1577            p = pp;            p = pp;
1578            }            }
1579          }          }
# Line 1004  while (ptr < endptr) Line 1589  while (ptr < endptr)
1589    
1590        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1591        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1592        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1593        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1594          the match will always be before the first newline sequence. */
1595    
1596        if (multiline)        if (multiline & !invert)
1597          {          {
         int ellength;  
1598          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1599          t = ptr;          t = ptr;
1600          while (t < endmatch)          while (t < endmatch)
1601            {            {
1602            t = end_of_line(t, endptr, &ellength);            t = end_of_line(t, endptr, &endlinelength);
1603            if (t <= endmatch) linenumber++; else break;            if (t < endmatch) linenumber++; else break;
1604            }            }
1605          endmatch = end_of_line(endmatch, endptr, &ellength);          linelength = t - ptr - endlinelength;
         linelength = endmatch - ptr - ellength;  
1606          }          }
1607    
1608        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 1033  while (ptr < endptr) Line 1617  while (ptr < endptr)
1617          {          {
1618          int first = S_arg * 2;          int first = S_arg * 2;
1619          int last  = first + 1;          int last  = first + 1;
1620          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1621          fprintf(stdout, "X");          fprintf(stdout, "X");
1622          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1623          }          }
1624        else        else
1625  #endif  #endif
1626    
1627        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1628          matches, but not of course if the line is a non-match. */
1629    
1630        if (do_colour)        if (do_colour && !invert)
1631          {          {
1632          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1633            FWRITE(ptr, 1, offsets[0], stdout);
1634          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1635          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1636          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1637          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1638              {
1639              startoffset = offsets[1];
1640              if (startoffset >= (int)linelength + endlinelength ||
1641                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1642                break;
1643              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1644              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1645              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1646              fprintf(stdout, "%c[00m", 0x1b);
1647              }
1648    
1649            /* In multiline mode, we may have already printed the complete line
1650            and its line-ending characters (if they matched the pattern), so there
1651            may be no more to print. */
1652    
1653            plength = (int)((linelength + endlinelength) - startoffset);
1654            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1655          }          }
1656        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1657          /* Not colouring; no need to search for further matches */
1658    
1659          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1660        }        }
1661    
1662      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1663        given, flush the output. */
1664    
1665        if (line_buffered) fflush(stdout);
1666      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1667    
1668      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1064  while (ptr < endptr) Line 1672  while (ptr < endptr)
1672      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1673      }      }
1674    
1675    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1676      anything to be printed), we have to move on to the end of the match before
1677      proceeding. */
1678    
1679      if (multiline && invert && match)
1680        {
1681        int ellength;
1682        char *endmatch = ptr + offsets[1];
1683        t = ptr;
1684        while (t < endmatch)
1685          {
1686          t = end_of_line(t, endptr, &ellength);
1687          if (t <= endmatch) linenumber++; else break;
1688          }
1689        endmatch = end_of_line(endmatch, endptr, &ellength);
1690        linelength = endmatch - ptr - ellength;
1691        }
1692    
1693      /* Advance to after the newline and increment the line number. The file
1694      offset to the current line is maintained in filepos. */
1695    
1696    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1697      filepos += (int)(linelength + endlinelength);
1698    linenumber++;    linenumber++;
1699    
1700      /* If input is line buffered, and the buffer is not yet full, read another
1701      line and add it into the buffer. */
1702    
1703      if (input_line_buffered && bufflength < (size_t)bufsize)
1704        {
1705        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1706        bufflength += add;
1707        endptr += add;
1708        }
1709    
1710    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1711    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1712    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1713    about to be lost, print them. */    about to be lost, print them. */
1714    
1715    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1716      {      {
1717      if (after_context > 0 &&      if (after_context > 0 &&
1718          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1719          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1720        {        {
1721        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1722        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 1086  while (ptr < endptr) Line 1724  while (ptr < endptr)
1724    
1725      /* Now do the shuffle */      /* Now do the shuffle */
1726    
1727      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1728      ptr -= MBUFTHIRD;      ptr -= bufthird;
1729      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1730      endptr = buffer + bufflength;  #ifdef SUPPORT_LIBZ
1731        if (frtype == FR_LIBZ)
1732          bufflength = 2*bufthird +
1733            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1734        else
1735    #endif
1736    
1737    #ifdef SUPPORT_LIBBZ2
1738        if (frtype == FR_LIBBZ2)
1739          bufflength = 2*bufthird +
1740            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1741        else
1742    #endif
1743    
1744        bufflength = 2*bufthird +
1745          (input_line_buffered?
1746           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1747           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1748        endptr = main_buffer + bufflength;
1749    
1750      /* Adjust any last match point */      /* Adjust any last match point */
1751    
1752      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1753      }      }
1754    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1755    
1756  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1757  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1758    
1759  if (!only_matching && !count_only)  if (only_matching < 0 && !count_only)
1760    {    {
1761    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1762    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1119  if (filenames == FN_NOMATCH_ONLY) Line 1775  if (filenames == FN_NOMATCH_ONLY)
1775    
1776  if (count_only)  if (count_only)
1777    {    {
1778    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1779    fprintf(stdout, "%d\n", count);      {
1780        if (printname != NULL && filenames != FN_NONE)
1781          fprintf(stdout, "%s:", printname);
1782        fprintf(stdout, "%d\n", count);
1783        }
1784    }    }
1785    
1786  return rc;  return rc;
# Line 1140  Arguments: Line 1800  Arguments:
1800    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1801    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1802    
1803  Returns:   0 if there was at least one match  Returns:  -1 the file/directory was skipped
1804               0 if there was at least one match
1805             1 if there were no matches             1 if there were no matches
1806             2 there was some kind of error             2 there was some kind of error
1807    
# Line 1151  static int Line 1812  static int
1812  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1813  {  {
1814  int rc = 1;  int rc = 1;
1815  int sep;  int frtype;
1816  FILE *in;  void *handle;
1817    char *lastcomp;
1818    FILE *in = NULL;           /* Ensure initialized */
1819    
1820    #ifdef SUPPORT_LIBZ
1821    gzFile ingz = NULL;
1822    #endif
1823    
1824    #ifdef SUPPORT_LIBBZ2
1825    BZFILE *inbz2 = NULL;
1826    #endif
1827    
1828    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1829    int pathlen;
1830    #endif
1831    
1832  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1833    
1834  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1835    {    {
1836    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN, stdin_name,
1837      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1838        stdin_name : NULL);        stdin_name : NULL);
1839    }    }
1840    
1841    /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
1842    directories, whereas --include and --exclude apply to everything else. The test
1843    is against the final component of the path. */
1844    
1845    lastcomp = strrchr(pathname, FILESEP);
1846    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
1847    
1848    /* If the file is a directory, skip if not recursing or if explicitly excluded.
1849    Otherwise, scan the directory and recurse for each path within it. The scanning
1850    code is localized so it can be made system-specific. */
1851    
1852    if (isdirectory(pathname))
1853      {
1854      if (dee_action == dee_SKIP ||
1855          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
1856        return -1;
1857    
 /* If the file is a directory, skip if skipping or if we are recursing, scan  
 each file within it, subject to any include or exclude patterns that were set.  
 The scanning code is localized so it can be made system-specific. */  
   
 if ((sep = isdirectory(pathname)) != 0)  
   {  
   if (dee_action == dee_SKIP) return 1;  
1858    if (dee_action == dee_RECURSE)    if (dee_action == dee_RECURSE)
1859      {      {
1860      char buffer[1024];      char buffer[1024];
# Line 1187  if ((sep = isdirectory(pathname)) != 0) Line 1871  if ((sep = isdirectory(pathname)) != 0)
1871    
1872      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1873        {        {
1874        int frc, blen;        int frc;
1875        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
       blen = strlen(buffer);  
   
       if (exclude_compiled != NULL &&  
           pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
         continue;  
   
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
   
1876        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1877        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
1878         else if (frc == 0 && rc == 1) rc = 0;         else if (frc == 0 && rc == 1) rc = 0;
# Line 1210  if ((sep = isdirectory(pathname)) != 0) Line 1884  if ((sep = isdirectory(pathname)) != 0)
1884    }    }
1885    
1886  /* If the file is not a directory and not a regular file, skip it if that's  /* If the file is not a directory and not a regular file, skip it if that's
1887  been requested. */  been requested. Otherwise, check for explicit include/exclude. */
1888    
1889  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;  else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
1890              !test_incexc(lastcomp, include_patterns, exclude_patterns))
1891            return -1;
1892    
1893  /* Control reaches here if we have a regular file, or if we have a directory  /* Control reaches here if we have a regular file, or if we have a directory
1894  and recursion or skipping was not requested, or if we have anything else and  and recursion or skipping was not requested, or if we have anything else and
# Line 1220  skipping was not requested. The scan pro Line 1896  skipping was not requested. The scan pro
1896  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1897  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1898    
1899  in = fopen(pathname, "r");  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1900  if (in == NULL)  pathlen = (int)(strlen(pathname));
1901    #endif
1902    
1903    /* Open using zlib if it is supported and the file name ends with .gz. */
1904    
1905    #ifdef SUPPORT_LIBZ
1906    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1907      {
1908      ingz = gzopen(pathname, "rb");
1909      if (ingz == NULL)
1910        {
1911        if (!silent)
1912          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1913            strerror(errno));
1914        return 2;
1915        }
1916      handle = (void *)ingz;
1917      frtype = FR_LIBZ;
1918      }
1919    else
1920    #endif
1921    
1922    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1923    
1924    #ifdef SUPPORT_LIBBZ2
1925    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1926      {
1927      inbz2 = BZ2_bzopen(pathname, "rb");
1928      handle = (void *)inbz2;
1929      frtype = FR_LIBBZ2;
1930      }
1931    else
1932    #endif
1933    
1934    /* Otherwise use plain fopen(). The label is so that we can come back here if
1935    an attempt to read a .bz2 file indicates that it really is a plain file. */
1936    
1937    #ifdef SUPPORT_LIBBZ2
1938    PLAIN_FILE:
1939    #endif
1940      {
1941      in = fopen(pathname, "rb");
1942      handle = (void *)in;
1943      frtype = FR_PLAIN;
1944      }
1945    
1946    /* All the opening methods return errno when they fail. */
1947    
1948    if (handle == NULL)
1949    {    {
1950    if (!silent)    if (!silent)
1951      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 1229  if (in == NULL) Line 1953  if (in == NULL)
1953    return 2;    return 2;
1954    }    }
1955    
1956  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1957    
1958    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1959    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1960    
1961    /* Close in an appropriate manner. */
1962    
1963    #ifdef SUPPORT_LIBZ
1964    if (frtype == FR_LIBZ)
1965      gzclose(ingz);
1966    else
1967    #endif
1968    
1969    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1970    read failed. If the error indicates that the file isn't in fact bzipped, try
1971    again as a normal file. */
1972    
1973    #ifdef SUPPORT_LIBBZ2
1974    if (frtype == FR_LIBBZ2)
1975      {
1976      if (rc == 3)
1977        {
1978        int errnum;
1979        const char *err = BZ2_bzerror(inbz2, &errnum);
1980        if (errnum == BZ_DATA_ERROR_MAGIC)
1981          {
1982          BZ2_bzclose(inbz2);
1983          goto PLAIN_FILE;
1984          }
1985        else if (!silent)
1986          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1987            pathname, err);
1988        rc = 2;    /* The normal "something went wrong" code */
1989        }
1990      BZ2_bzclose(inbz2);
1991      }
1992    else
1993    #endif
1994    
1995    /* Normal file close */
1996    
1997  fclose(in);  fclose(in);
1998    
1999    /* Pass back the yield from pcregrep(). */
2000    
2001  return rc;  return rc;
2002  }  }
2003    
# Line 1253  for (op = optionlist; op->one_char != 0; Line 2018  for (op = optionlist; op->one_char != 0;
2018    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
2019    }    }
2020  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
2021  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
2022      "options.\n");
2023  return rc;  return rc;
2024  }  }
2025    
# Line 1272  option_item *op; Line 2038  option_item *op;
2038  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
2039  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
2040  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
2041  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
2042  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
2043    #ifdef SUPPORT_LIBZ
2044    printf("Files whose names end in .gz are read using zlib.\n");
2045    #endif
2046    
2047    #ifdef SUPPORT_LIBBZ2
2048    printf("Files whose names end in .bz2 are read using bzlib2.\n");
2049    #endif
2050    
2051    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2052    printf("Other files and the standard input are read as plain files.\n\n");
2053    #else
2054    printf("All files are read as plain files, without any interpretation.\n\n");
2055    #endif
2056    
2057    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
2058  printf("Options:\n");  printf("Options:\n");
2059    
2060  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
2061    {    {
2062    int n;    int n;
2063    char s[4];    char s[4];
2064    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
2065    printf("  %s --%s%n", s, op->long_name, &n);    /* Two options were accidentally implemented and documented with underscores
2066    n = 30 - n;    instead of hyphens in their names, something that was not noticed for quite a
2067      few releases. When fixing this, I left the underscored versions in the list
2068      in case people were using them. However, we don't want to display them in the
2069      help data. There are no other options that contain underscores, and we do not
2070      expect ever to implement such options. Therefore, just omit any option that
2071      contains an underscore. */
2072    
2073      if (strchr(op->long_name, '_') != NULL) continue;
2074    
2075      if (op->one_char > 0 && (op->long_name)[0] == 0)
2076        n = 31 - printf("  -%c", op->one_char);
2077      else
2078        {
2079        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
2080          else strcpy(s, "   ");
2081        n = 31 - printf("  %s --%s", s, op->long_name);
2082        }
2083    
2084    if (n < 1) n = 1;    if (n < 1) n = 1;
2085    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                           ", op->help_text);
2086    }    }
2087    
2088  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
2089  printf("trailing white space is removed and blank lines are ignored.\n");  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
2090  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("When reading patterns or file names from a file, trailing white\n");
2091    printf("space is removed and blank lines are ignored.\n");
2092    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
2093    
2094  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
2095  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 1308  handle_option(int letter, int options) Line 2107  handle_option(int letter, int options)
2107  {  {
2108  switch(letter)  switch(letter)
2109    {    {
2110    case N_HELP: help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
2111      case N_HELP: help(); pcregrep_exit(0);
2112      case N_LBUFFER: line_buffered = TRUE; break;
2113      case N_LOFFSETS: line_offsets = number = TRUE; break;
2114      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2115      case 'a': binary_files = BIN_TEXT; break;
2116    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
2117    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
2118    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
2119      case 'I': binary_files = BIN_NOMATCH; break;
2120    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
2121    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
2122    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2123    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
2124    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2125    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
2126    case 'o': only_matching = TRUE; break;    case 'o': only_matching = 0; break;
2127    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
2128    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
2129    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1328  switch(letter) Line 2133  switch(letter)
2133    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
2134    
2135    case 'V':    case 'V':
2136    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stdout, "pcregrep version %s\n", pcre_version());
2137    fprintf(stderr, "PCRE version %s\n", pcre_version());    pcregrep_exit(0);
   exit(0);  
2138    break;    break;
2139    
2140    default:    default:
2141    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2142    exit(usage(2));    pcregrep_exit(usage(2));
2143    }    }
2144    
2145  return options;  return options;
# Line 1373  return buffer; Line 2177  return buffer;
2177  *          Compile a single pattern              *  *          Compile a single pattern              *
2178  *************************************************/  *************************************************/
2179    
2180  /* When the -F option has been used, this is called for each substring.  /* Do nothing if the pattern has already been compiled. This is the case for
2181  Otherwise it's called for each supplied pattern.  include/exclude patterns read from a file.
2182    
2183    When the -F option has been used, each "pattern" may be a list of strings,
2184    separated by line breaks. They will be matched literally. We split such a
2185    string and compile the first substring, inserting an additional block into the
2186    pattern chain.
2187    
2188  Arguments:  Arguments:
2189    pattern        the pattern string    p              points to the pattern block
2190    options        the PCRE options    options        the PCRE options
2191    filename       the file name, or NULL for a command-line pattern    popts          the processing options
2192      fromfile       TRUE if the pattern was read from a file
2193      fromtext       file name or identifying text (e.g. "include")
2194    count          0 if this is the only command line pattern, or    count          0 if this is the only command line pattern, or
2195                   number of the command line pattern, or                   number of the command line pattern, or
2196                   linenumber for a pattern from a file                   linenumber for a pattern from a file
# Line 1388  Returns:         TRUE on success, FALSE Line 2199  Returns:         TRUE on success, FALSE
2199  */  */
2200    
2201  static BOOL  static BOOL
2202  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_pattern(patstr *p, int options, int popts, int fromfile,
2203      const char *fromtext, int count)
2204  {  {
2205  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
2206  const char *error;  const char *error;
2207    char *ps = p->string;
2208    int patlen = strlen(ps);
2209  int errptr;  int errptr;
2210    
2211  if (pattern_count >= MAX_PATTERN_COUNT)  if (p->compiled != NULL) return TRUE;
2212    
2213    if ((popts & PO_FIXED_STRINGS) != 0)
2214    {    {
2215    fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",    int ellength;
2216      (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);    char *eop = ps + patlen;
2217    return FALSE;    char *pe = end_of_line(ps, eop, &ellength);
2218    
2219      if (ellength != 0)
2220        {
2221        if (add_pattern(pe, p) == NULL) return FALSE;
2222        patlen = (int)(pe - ps - ellength);
2223        }
2224    }    }
2225    
2226  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2227    suffix[process_options]);  p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2228  pattern_list[pattern_count] =  if (p->compiled != NULL) return TRUE;
   pcre_compile(buffer, options, &error, &errptr, pcretables);  
 if (pattern_list[pattern_count++] != NULL) return TRUE;  
2229    
2230  /* Handle compile errors */  /* Handle compile errors */
2231    
2232  errptr -= (int)strlen(prefix[process_options]);  errptr -= (int)strlen(prefix[popts]);
2233  if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);  if (errptr > patlen) errptr = patlen;
2234    
2235  if (filename == NULL)  if (fromfile)
2236    {    {
2237    if (count == 0)    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2238      fprintf(stderr, "pcregrep: Error in command-line regex "      "at offset %d: %s\n", count, fromtext, errptr, error);
       "at offset %d: %s\n", errptr, error);  
   else  
     fprintf(stderr, "pcregrep: Error in %s command-line regex "  
       "at offset %d: %s\n", ordin(count), errptr, error);  
2239    }    }
2240  else  else
2241    {    {
2242    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "    if (count == 0)
2243      "at offset %d: %s\n", count, filename, errptr, error);      fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2244          fromtext, errptr, error);
2245      else
2246        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2247          ordin(count), fromtext, errptr, error);
2248    }    }
2249    
2250  return FALSE;  return FALSE;
# Line 1433  return FALSE; Line 2253  return FALSE;
2253    
2254    
2255  /*************************************************  /*************************************************
2256  *           Compile one supplied pattern         *  *     Read and compile a file of patterns        *
2257  *************************************************/  *************************************************/
2258    
2259  /* When the -F option has been used, each string may be a list of strings,  /* This is used for --filelist, --include-from, and --exclude-from.
 separated by line breaks. They will be matched literally.  
2260    
2261  Arguments:  Arguments:
2262    pattern        the pattern string    name         the name of the file; "-" is stdin
2263    options        the PCRE options    patptr       pointer to the pattern chain anchor
2264    filename       the file name, or NULL for a command-line pattern    patlastptr   pointer to the last pattern pointer
2265    count          0 if this is the only command line pattern, or    popts        the process options to pass to pattern_compile()
                  number of the command line pattern, or  
                  linenumber for a pattern from a file  
2266    
2267  Returns:         TRUE on success, FALSE after an error  Returns:       TRUE if all went well
2268  */  */
2269    
2270  static BOOL  static BOOL
2271  compile_pattern(char *pattern, int options, char *filename, int count)  read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2272  {  {
2273  if ((process_options & PO_FIXED_STRINGS) != 0)  int linenumber = 0;
2274    FILE *f;
2275    char *filename;
2276    char buffer[PATBUFSIZE];
2277    
2278    if (strcmp(name, "-") == 0)
2279      {
2280      f = stdin;
2281      filename = stdin_name;
2282      }
2283    else
2284      {
2285      f = fopen(name, "r");
2286      if (f == NULL)
2287        {
2288        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2289        return FALSE;
2290        }
2291      filename = name;
2292      }
2293    
2294    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2295    {    {
2296    char *eop = pattern + strlen(pattern);    char *s = buffer + (int)strlen(buffer);
2297    char buffer[MBUFTHIRD];    while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2298      *s = 0;
2299      linenumber++;
2300      if (buffer[0] == 0) continue;   /* Skip blank lines */
2301    
2302      /* Note: this call to add_pattern() puts a pointer to the local variable
2303      "buffer" into the pattern chain. However, that pointer is used only when
2304      compiling the pattern, which happens immediately below, so we flatten it
2305      afterwards, as a precaution against any later code trying to use it. */
2306    
2307      *patlastptr = add_pattern(buffer, *patlastptr);
2308      if (*patlastptr == NULL) return FALSE;
2309      if (*patptr == NULL) *patptr = *patlastptr;
2310    
2311      /* This loop is needed because compiling a "pattern" when -F is set may add
2312      on additional literal patterns if the original contains a newline. In the
2313      common case, it never will, because fgets() stops at a newline. However,
2314      the -N option can be used to give pcregrep a different newline setting. */
2315    
2316    for(;;)    for(;;)
2317      {      {
2318      int ellength;      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2319      char *p = end_of_line(pattern, eop, &ellength);          linenumber))
     if (ellength == 0)  
       return compile_single_pattern(pattern, options, filename, count);  
     sprintf(buffer, "%.*s", p - pattern - ellength, pattern);  
     pattern = p;  
     if (!compile_single_pattern(buffer, options, filename, count))  
2320        return FALSE;        return FALSE;
2321        (*patlastptr)->string = NULL;            /* Insurance */
2322        if ((*patlastptr)->next == NULL) break;
2323        *patlastptr = (*patlastptr)->next;
2324      }      }
2325    }    }
2326  else return compile_single_pattern(pattern, options, filename, count);  
2327    if (f != stdin) fclose(f);
2328    return TRUE;
2329  }  }
2330    
2331    
# Line 1485  main(int argc, char **argv) Line 2341  main(int argc, char **argv)
2341  {  {
2342  int i, j;  int i, j;
2343  int rc = 1;  int rc = 1;
 int pcre_options = 0;  
 int cmd_pattern_count = 0;  
 int errptr;  
2344  BOOL only_one_at_top;  BOOL only_one_at_top;
2345  char *patterns[MAX_PATTERN_COUNT];  patstr *cp;
2346    fnstr *fn;
2347  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2348  const char *error;  const char *error;
2349    
2350    #ifdef SUPPORT_PCREGREP_JIT
2351    pcre_jit_stack *jit_stack = NULL;
2352    #endif
2353    
2354  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2355  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2356  */  Note that the return values from pcre_config(), though derived from the ASCII
2357    codes, are the same in EBCDIC environments, so we must use the actual values
2358    rather than escapes such as as '\r'. */
2359    
2360  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2361  switch(i)  switch(i)
2362    {    {
2363    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
2364    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
2365    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2366    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
2367      case -2:               newline = (char *)"anycrlf"; break;
2368    }    }
2369    
2370  /* Process the options */  /* Process the options */
# Line 1522  for (i = 1; i < argc; i++) Line 2383  for (i = 1; i < argc; i++)
2383    
2384    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2385      {      {
2386      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_files != NULL || patterns != NULL) break;
2387        else exit(usage(2));        else pcregrep_exit(usage(2));
2388      }      }
2389    
2390    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1545  for (i = 1; i < argc; i++) Line 2406  for (i = 1; i < argc; i++)
2406      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2407      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2408      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2409      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2410      these categories, fortunately. */      both these categories. */
2411    
2412      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2413        {        {
2414        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2415        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2416        if (opbra == NULL)     /* Not a (p) case */  
2417          /* Handle options with only one spelling of the name */
2418    
2419          if (opbra == NULL)     /* Does not contain '(' */
2420          {          {
2421          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2422            {            {
# Line 1560  for (i = 1; i < argc; i++) Line 2424  for (i = 1; i < argc; i++)
2424            }            }
2425          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2426            {            {
2427            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2428            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2429                (int)strlen(arg) : (int)(argequals - arg);
2430            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2431              {              {
2432              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1574  for (i = 1; i < argc; i++) Line 2439  for (i = 1; i < argc; i++)
2439              }              }
2440            }            }
2441          }          }
2442        else                   /* Special case xxxx(p) */  
2443          /* Handle options with an alternate spelling of the name */
2444    
2445          else
2446          {          {
2447          char buff1[24];          char buff1[24];
2448          char buff2[24];          char buff2[24];
2449          int baselen = opbra - op->long_name;  
2450            int baselen = (int)(opbra - op->long_name);
2451            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2452            int arglen = (argequals == NULL || equals == NULL)?
2453              (int)strlen(arg) : (int)(argequals - arg);
2454    
2455          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2456          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2457            opbra + 1);  
2458          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2459               strncmp(arg, buff2, arglen) == 0)
2460              {
2461              if (equals != NULL && argequals != NULL)
2462                {
2463                option_data = argequals;
2464                if (*option_data == '=')
2465                  {
2466                  option_data++;
2467                  longopwasequals = TRUE;
2468                  }
2469                }
2470            break;            break;
2471              }
2472          }          }
2473        }        }
2474    
2475      if (op->one_char == 0)      if (op->one_char == 0)
2476        {        {
2477        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2478        exit(usage(2));        pcregrep_exit(usage(2));
2479        }        }
2480      }      }
2481    
   
2482    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2483    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2484    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1628  for (i = 1; i < argc; i++) Line 2512  for (i = 1; i < argc; i++)
2512      while (*s != 0)      while (*s != 0)
2513        {        {
2514        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2515          { if (*s == op->one_char) break; }          {
2516            if (*s == op->one_char) break;
2517            }
2518        if (op->one_char == 0)        if (op->one_char == 0)
2519          {          {
2520          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2521            *s, argv[i]);            *s, argv[i]);
2522          exit(usage(2));          pcregrep_exit(usage(2));
2523            }
2524    
2525          /* Check for a single-character option that has data: OP_OP_NUMBER
2526          is used for one that either has a numerical number or defaults, i.e. the
2527          data is optional. If a digit follows, there is data; if not, carry on
2528          with other single-character options in the same string. */
2529    
2530          option_data = s+1;
2531          if (op->type == OP_OP_NUMBER)
2532            {
2533            if (isdigit((unsigned char)s[1])) break;
2534          }          }
2535        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for end or a dataless option */
2536          {          {
2537          option_data = s+1;          if (op->type != OP_NODATA || s[1] == 0) break;
         break;  
2538          }          }
2539    
2540          /* Handle a single-character option with no data, then loop for the
2541          next character in the string. */
2542    
2543        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2544        }        }
2545      }      }
# Line 1656  for (i = 1; i < argc; i++) Line 2556  for (i = 1; i < argc; i++)
2556    
2557    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2558    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2559    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2560    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2561    
2562    if (*option_data == 0 &&    if (*option_data == 0 &&
2563        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1667  for (i = 1; i < argc; i++) Line 2567  for (i = 1; i < argc; i++)
2567        case N_COLOUR:        case N_COLOUR:
2568        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2569        break;        break;
2570    
2571          case 'o':
2572          only_matching = 0;
2573          break;
2574    
2575  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2576        case 'S':        case 'S':
2577        S_arg = 0;        S_arg = 0;
# Line 1683  for (i = 1; i < argc; i++) Line 2588  for (i = 1; i < argc; i++)
2588      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2589        {        {
2590        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2591        exit(usage(2));        pcregrep_exit(usage(2));
2592        }        }
2593      option_data = argv[++i];      option_data = argv[++i];
2594      }      }
2595    
2596    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_PATLIST, it's the -e option, or one of the
2597    multiple times to create a list of patterns. */    include/exclude options, which can be called multiple times to create lists
2598      of patterns. */
2599    
2600    if (op->type == OP_PATLIST)    if (op->type == OP_PATLIST)
2601         {
2602         patdatastr *pd = (patdatastr *)op->dataptr;
2603         *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2604         if (*(pd->lastptr) == NULL) goto EXIT2;
2605         if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2606         }
2607    
2608      /* If the option type is OP_FILELIST, it's one of the options that names a
2609      file. */
2610    
2611      else if (op->type == OP_FILELIST)
2612        {
2613        fndatastr *fd = (fndatastr *)op->dataptr;
2614        fn = (fnstr *)malloc(sizeof(fnstr));
2615        if (fn == NULL)
2616          {
2617          fprintf(stderr, "pcregrep: malloc failed\n");
2618          goto EXIT2;
2619          }
2620        fn->next = NULL;
2621        fn->name = option_data;
2622        if (*(fd->anchor) == NULL)
2623          *(fd->anchor) = fn;
2624        else
2625          (*(fd->lastptr))->next = fn;
2626        *(fd->lastptr) = fn;
2627        }
2628    
2629      /* Handle OP_BINARY_FILES */
2630    
2631      else if (op->type == OP_BINFILES)
2632      {      {
2633      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      if (strcmp(option_data, "binary") == 0)
2634          binary_files = BIN_BINARY;
2635        else if (strcmp(option_data, "without-match") == 0)
2636          binary_files = BIN_NOMATCH;
2637        else if (strcmp(option_data, "text") == 0)
2638          binary_files = BIN_TEXT;
2639        else
2640        {        {
2641        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",        fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2642          MAX_PATTERN_COUNT);          option_data);
2643        return 2;        pcregrep_exit(usage(2));
2644        }        }
     patterns[cmd_pattern_count++] = option_data;  
2645      }      }
2646    
2647    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
2648    
2649    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2650               op->type != OP_OP_NUMBER)
2651      {      {
2652      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2653      }      }
2654    
2655      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2656      only for unpicking arguments, so just keep it simple. */
2657    
2658    else    else
2659      {      {
2660      char *endptr;      unsigned long int n = 0;
2661      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2662        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2663        while (isdigit((unsigned char)(*endptr)))
2664          n = n * 10 + (int)(*endptr++ - '0');
2665        if (toupper(*endptr) == 'K')
2666          {
2667          n *= 1024;
2668          endptr++;
2669          }
2670        else if (toupper(*endptr) == 'M')
2671          {
2672          n *= 1024*1024;
2673          endptr++;
2674          }
2675      if (*endptr != 0)      if (*endptr != 0)
2676        {        {
2677        if (longop)        if (longop)
2678          {          {
2679          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2680          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2681            equals - op->long_name;            (int)(equals - op->long_name);
2682          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2683            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2684          }          }
2685        else        else
2686          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2687            option_data, op->one_char);            option_data, op->one_char);
2688        exit(usage(2));        pcregrep_exit(usage(2));
2689        }        }
2690      *((int *)op->dataptr) = n;      if (op->type == OP_LONGNUMBER)
2691            *((unsigned long int *)op->dataptr) = n;
2692        else
2693            *((int *)op->dataptr) = n;
2694      }      }
2695    }    }
2696    
# Line 1740  if (both_context > 0) Line 2703  if (both_context > 0)
2703    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2704    }    }
2705    
2706    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2707    However, the latter two set only_matching. */
2708    
2709    if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2710        (file_offsets && line_offsets))
2711      {
2712      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2713        "and/or --line-offsets\n");
2714      pcregrep_exit(usage(2));
2715      }
2716    
2717    if (file_offsets || line_offsets) only_matching = 0;
2718    
2719  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2720  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2721    
# Line 1811  else if (strcmp(newline, "any") == 0 || Line 2787  else if (strcmp(newline, "any") == 0 ||
2787    pcre_options |= PCRE_NEWLINE_ANY;    pcre_options |= PCRE_NEWLINE_ANY;
2788    endlinetype = EL_ANY;    endlinetype = EL_ANY;
2789    }    }
2790    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2791      {
2792      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2793      endlinetype = EL_ANYCRLF;
2794      }
2795  else  else
2796    {    {
2797    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
# Line 1857  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 2838  if (jfriedl_XT != 0 || jfriedl_XR != 0)
2838    }    }
2839  #endif  #endif
2840    
2841  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer. */
2842    
2843  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  bufsize = 3*bufthird;
2844  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  main_buffer = (char *)malloc(bufsize);
2845    
2846  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL)
2847    {    {
2848    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2849    return 2;    goto EXIT2;
2850    }    }
2851    
2852  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there are no files provided by -f,
2853  the first argument is the one and only pattern, and it must exist. */  the first argument is the one and only pattern, and it must exist. */
2854    
2855  if (cmd_pattern_count == 0 && pattern_filename == NULL)  if (patterns == NULL && pattern_files == NULL)
2856    {    {
2857    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2858    patterns[cmd_pattern_count++] = argv[i++];    patterns = patterns_last = add_pattern(argv[i++], NULL);
2859      if (patterns == NULL) goto EXIT2;
2860    }    }
2861    
2862  /* Compile the patterns that were provided on the command line, either by  /* Compile the patterns that were provided on the command line, either by
2863  multiple uses of -e or as a single unkeyed pattern. */  multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2864    after all the command-line options are read so that we know which PCRE options
2865    to use. When -F is used, compile_pattern() may add another block into the
2866    chain, so we must not access the next pointer till after the compile. */
2867    
2868  for (j = 0; j < cmd_pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2869    {    {
2870    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
2871         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 1 && patterns->next == NULL)? 0 : j))
2872      return 2;      goto EXIT2;
2873    }    }
2874    
2875  /* Compile the regular expressions that are provided in a file. */  /* Read and compile the regular expressions that are provided in files. */
2876    
2877  if (pattern_filename != NULL)  for (fn = pattern_files; fn != NULL; fn = fn->next)
2878    {    {
2879    int linenumber = 0;    if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
2880    FILE *f;      goto EXIT2;
2881    char *filename;    }
   char buffer[MBUFTHIRD];  
2882    
2883    if (strcmp(pattern_filename, "-") == 0)  /* Study the regular expressions, as we will be running them many times. Unless
2884    JIT has been explicitly disabled, arrange a stack for it to use. */
2885    
2886    #ifdef SUPPORT_PCREGREP_JIT
2887    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2888      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2889    #endif
2890    
2891    for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2892      {
2893      cp->hint = pcre_study(cp->compiled, study_options, &error);
2894      if (error != NULL)
2895      {      {
2896      f = stdin;      char s[16];
2897      filename = stdin_name;      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
2898        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2899        goto EXIT2;
2900      }      }
2901    else  #ifdef SUPPORT_PCREGREP_JIT
2902      if (jit_stack != NULL && cp->hint != NULL)
2903        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
2904    #endif
2905      }
2906    
2907    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2908    pcre_extra block for each pattern. */
2909    
2910    if (match_limit > 0 || match_limit_recursion > 0)
2911      {
2912      for (cp = patterns; cp != NULL; cp = cp->next)
2913      {      {
2914      f = fopen(pattern_filename, "r");      if (cp->hint == NULL)
     if (f == NULL)  
2915        {        {
2916        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        cp->hint = (pcre_extra *)malloc(sizeof(pcre_extra));
2917          strerror(errno));        if (cp->hint == NULL)
2918        return 2;          {
2919            fprintf(stderr, "pcregrep: malloc failed\n");
2920            pcregrep_exit(2);
2921            }
2922          }
2923        if (match_limit > 0)
2924          {
2925          cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
2926          cp->hint->match_limit = match_limit;
2927          }
2928        if (match_limit_recursion > 0)
2929          {
2930          cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2931          cp->hint->match_limit_recursion = match_limit_recursion;
2932        }        }
     filename = pattern_filename;  
2933      }      }
2934      }
2935    
2936    /* If there are include or exclude patterns read from the command line, compile
2937    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
2938    0. */
2939    
2940    while (fgets(buffer, MBUFTHIRD, f) != NULL)  for (j = 0; j < 4; j++)
2941      {
2942      int k;
2943      for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
2944      {      {
2945      char *s = buffer + (int)strlen(buffer);      if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
2946      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;           (k == 1 && cp->next == NULL)? 0 : k))
2947      *s = 0;        goto EXIT2;
     linenumber++;  
     if (buffer[0] == 0) continue;   /* Skip blank lines */  
     if (!compile_pattern(buffer, pcre_options, filename, linenumber))  
       return 2;  
2948      }      }
   
   if (f != stdin) fclose(f);  
2949    }    }
2950    
2951  /* Study the regular expressions, as we will be running them many times */  /* Read and compile include/exclude patterns from files. */
2952    
2953  for (j = 0; j < pattern_count; j++)  for (fn = include_from; fn != NULL; fn = fn->next)
2954    {    {
2955    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
2956    if (error != NULL)      goto EXIT2;
2957      {    }
2958      char s[16];  
2959      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);  for (fn = exclude_from; fn != NULL; fn = fn->next)
2960      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);    {
2961      return 2;    if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
2962      }      goto EXIT2;
2963    }    }
2964    
2965  /* If there are include or exclude patterns, compile them. */  /* If there are no files that contain lists of files to search, and there are
2966    no file arguments, search stdin, and then exit. */
2967    
2968  if (exclude_pattern != NULL)  if (file_lists == NULL && i >= argc)
2969    {    {
2970    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2971      pcretables);      (filenames > FN_DEFAULT)? stdin_name : NULL);
2972    if (exclude_compiled == NULL)    goto EXIT;
     {  
     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",  
       errptr, error);  
     return 2;  
     }  
2973    }    }
2974    
2975  if (include_pattern != NULL)  /* If any files that contains a list of files to search have been specified,
2976    read them line by line and search the given files. */
2977    
2978    for (fn = file_lists; fn != NULL; fn = fn->next)
2979    {    {
2980    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,    char buffer[PATBUFSIZE];
2981      pcretables);    FILE *fl;
2982    if (include_compiled == NULL)    if (strcmp(fn->name, "-") == 0) fl = stdin; else
2983      {      {
2984      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fl = fopen(fn->name, "rb");
2985        errptr, error);      if (fl == NULL)
2986      return 2;        {
2987          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
2988            strerror(errno));
2989          goto EXIT2;
2990          }
2991      }      }
2992      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2993        {
2994        int frc;
2995        char *end = buffer + (int)strlen(buffer);
2996        while (end > buffer && isspace(end[-1])) end--;
2997        *end = 0;
2998        if (*buffer != 0)
2999          {
3000          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3001          if (frc > 1) rc = frc;
3002            else if (frc == 0 && rc == 1) rc = 0;
3003          }
3004        }
3005      if (fl != stdin) fclose(fl);
3006    }    }
3007    
3008  /* If there are no further arguments, do the business on stdin and exit. */  /* After handling file-list, work through remaining arguments. Pass in the fact
3009    that there is only one argument at top level - this suppresses the file name if
3010  if (i >= argc)  the argument is not a directory and filenames are not otherwise forced. */
   return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);  
3011    
3012  /* Otherwise, work through the remaining arguments as files or directories.  only_one_at_top = i == argc - 1 && file_lists == NULL;
 Pass in the fact that there is only one argument at top level - this suppresses  
 the file name if the argument is not a directory and filenames are not  
 otherwise forced. */  
   
 only_one_at_top = i == argc - 1;   /* Catch initial value of i */  
3013    
3014  for (; i < argc; i++)  for (; i < argc; i++)
3015    {    {
# Line 1987  for (; i < argc; i++) Line 3019  for (; i < argc; i++)
3019      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
3020    }    }
3021    
3022  return rc;  EXIT:
3023    #ifdef SUPPORT_PCREGREP_JIT
3024    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3025    #endif
3026    
3027    if (main_buffer != NULL) free(main_buffer);
3028    
3029    free_pattern_chain(patterns);
3030    free_pattern_chain(include_patterns);
3031    free_pattern_chain(include_dir_patterns);
3032    free_pattern_chain(exclude_patterns);
3033    free_pattern_chain(exclude_dir_patterns);
3034    
3035    free_file_chain(exclude_from);
3036    free_file_chain(include_from);
3037    free_file_chain(pattern_files);
3038    free_file_chain(file_lists);
3039    
3040    pcregrep_exit(rc);
3041    
3042    EXIT2:
3043    rc = 2;
3044    goto EXIT;
3045  }  }
3046    
3047  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.96  
changed lines
  Added in v.1033

  ViewVC Help
Powered by ViewVC 1.1.5