/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 345 by ph10, Mon Apr 28 15:10:02 2008 UTC revision 1004 by ph10, Fri Aug 17 08:26:17 2012 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 70  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define OFFSET_SIZE 99
74    
75  #if BUFSIZ > 8192  #if BUFSIZ > 8192
76  #define MBUFTHIRD BUFSIZ  #define MAXPATLEN BUFSIZ
77  #else  #else
78  #define MBUFTHIRD 8192  #define MAXPATLEN 8192
79  #endif  #endif
80    
81    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
82    
83  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
84  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
85  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
86    
87  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88    
89  /* File reading styles */  /* File reading styles */
90    
# Line 103  enum { DEE_READ, DEE_SKIP }; Line 105  enum { DEE_READ, DEE_SKIP };
105    
106  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107    
108    /* Binary file options */
109    
110    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111    
112    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113    environments), a warning is issued if the value of fwrite() is ignored.
114    Unfortunately, casting to (void) does not suppress the warning. To get round
115    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116    apply to fprintf(). */
117    
118    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119    
120    
121    
122  /*************************************************  /*************************************************
# Line 126  static char *colour_string = (char *)"1; Line 140  static char *colour_string = (char *)"1;
140  static char *colour_option = NULL;  static char *colour_option = NULL;
141  static char *dee_option = NULL;  static char *dee_option = NULL;
142  static char *DEE_option = NULL;  static char *DEE_option = NULL;
143    static char *locale = NULL;
144    static char *main_buffer = NULL;
145  static char *newline = NULL;  static char *newline = NULL;
 static char *pattern_filename = NULL;  
146  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
 static char *locale = NULL;  
147    
148  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
149    
 static int  pattern_count = 0;  
 static pcre **pattern_list = NULL;  
 static pcre_extra **hints_list = NULL;  
   
 static char *include_pattern = NULL;  
 static char *exclude_pattern = NULL;  
 static char *include_dir_pattern = NULL;  
 static char *exclude_dir_pattern = NULL;  
   
 static pcre *include_compiled = NULL;  
 static pcre *exclude_compiled = NULL;  
 static pcre *include_dir_compiled = NULL;  
 static pcre *exclude_dir_compiled = NULL;  
   
150  static int after_context = 0;  static int after_context = 0;
151  static int before_context = 0;  static int before_context = 0;
152    static int binary_files = BIN_BINARY;
153  static int both_context = 0;  static int both_context = 0;
154    static int bufthird = PCREGREP_BUFSIZE;
155    static int bufsize = 3*PCREGREP_BUFSIZE;
156    
157    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
158    static int dee_action = dee_SKIP;
159    #else
160  static int dee_action = dee_READ;  static int dee_action = dee_READ;
161    #endif
162    
163  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
164  static int error_count = 0;  static int error_count = 0;
165  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
166    static int only_matching = -1;
167    static int pcre_options = 0;
168  static int process_options = 0;  static int process_options = 0;
169    
170    #ifdef SUPPORT_PCREGREP_JIT
171    static int study_options = PCRE_STUDY_JIT_COMPILE;
172    #else
173    static int study_options = 0;
174    #endif
175    
176    static unsigned long int match_limit = 0;
177    static unsigned long int match_limit_recursion = 0;
178    
179  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
180  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
181  static BOOL file_offsets = FALSE;  static BOOL file_offsets = FALSE;
182  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
183  static BOOL invert = FALSE;  static BOOL invert = FALSE;
184    static BOOL line_buffered = FALSE;
185  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
186  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
187  static BOOL number = FALSE;  static BOOL number = FALSE;
188  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
189    static BOOL resource_error = FALSE;
190  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
191  static BOOL silent = FALSE;  static BOOL silent = FALSE;
192  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
193    
194    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
195    
196    typedef struct fnstr {
197      struct fnstr *next;
198      char *name;
199    } fnstr;
200    
201    static fnstr *exclude_from = NULL;
202    static fnstr *exclude_from_last = NULL;
203    static fnstr *include_from = NULL;
204    static fnstr *include_from_last = NULL;
205    
206    static fnstr *file_lists = NULL;
207    static fnstr *file_lists_last = NULL;
208    static fnstr *pattern_files = NULL;
209    static fnstr *pattern_files_last = NULL;
210    
211    /* Structure for holding the two variables that describe a file name chain. */
212    
213    typedef struct fndatastr {
214      fnstr **anchor;
215      fnstr **lastptr;
216    } fndatastr;
217    
218    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
219    static fndatastr include_from_data = { &include_from, &include_from_last };
220    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
221    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
222    
223    /* Structure for pattern and its compiled form; used for matching patterns and
224    also for include/exclude patterns. */
225    
226    typedef struct patstr {
227      struct patstr *next;
228      char *string;
229      pcre *compiled;
230      pcre_extra *hint;
231    } patstr;
232    
233    static patstr *patterns = NULL;
234    static patstr *patterns_last = NULL;
235    static patstr *include_patterns = NULL;
236    static patstr *include_patterns_last = NULL;
237    static patstr *exclude_patterns = NULL;
238    static patstr *exclude_patterns_last = NULL;
239    static patstr *include_dir_patterns = NULL;
240    static patstr *include_dir_patterns_last = NULL;
241    static patstr *exclude_dir_patterns = NULL;
242    static patstr *exclude_dir_patterns_last = NULL;
243    
244    /* Structure holding the two variables that describe a pattern chain. A pointer
245    to such structures is used for each appropriate option. */
246    
247    typedef struct patdatastr {
248      patstr **anchor;
249      patstr **lastptr;
250    } patdatastr;
251    
252    static patdatastr match_patdata = { &patterns, &patterns_last };
253    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
254    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
255    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
256    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
257    
258    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
259                                     &include_dir_patterns, &exclude_dir_patterns };
260    
261    static const char *incexname[4] = { "--include", "--exclude",
262                                        "--include-dir", "--exclude-dir" };
263    
264  /* Structure for options and list of them */  /* Structure for options and list of them */
265    
266  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
267         OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST, OP_FILELIST, OP_BINFILES };
268    
269  typedef struct option_item {  typedef struct option_item {
270    int type;    int type;
# Line 196  used to identify them. */ Line 288  used to identify them. */
288  #define N_NULL         (-9)  #define N_NULL         (-9)
289  #define N_LOFFSETS     (-10)  #define N_LOFFSETS     (-10)
290  #define N_FOFFSETS     (-11)  #define N_FOFFSETS     (-11)
291    #define N_LBUFFER      (-12)
292    #define N_M_LIMIT      (-13)
293    #define N_M_LIMIT_REC  (-14)
294    #define N_BUFSIZE      (-15)
295    #define N_NOJIT        (-16)
296    #define N_FILE_LIST    (-17)
297    #define N_BINARY_FILES (-18)
298    #define N_EXCLUDE_FROM (-19)
299    #define N_INCLUDE_FROM (-20)
300    
301  static option_item optionlist[] = {  static option_item optionlist[] = {
302    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
303    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
304    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
305    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
306    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
307    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
308    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
309    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
310    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
311    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
312    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
313    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
314    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
315    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
316    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
317    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
318    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
319    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
320    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
321    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
322    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
323    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
324    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },  #ifdef SUPPORT_PCREGREP_JIT
325    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
326    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },  #else
327    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
328    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },  #endif
329    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
330    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
331    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
332    { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },    { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
333    { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },    { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
334      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
335      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
336      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
337      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
338      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
339      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
340      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
341      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
342      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
343      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
344      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
345      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
346      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
347      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
348      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
349    
350      /* These two were accidentally implemented with underscores instead of
351      hyphens in the option names. As this was not discovered for several releases,
352      the incorrect versions are left in the table for compatibility. However, the
353      --help function misses out any option that has an underscore in its name. */
354    
355      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
356      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
357    
358  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
359    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
360  #endif  #endif
# Line 245  static option_item optionlist[] = { Line 370  static option_item optionlist[] = {
370  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
371  options. These set the 1, 2, and 4 bits in process_options, respectively. Note  options. These set the 1, 2, and 4 bits in process_options, respectively. Note
372  that the combination of -w and -x has the same effect as -x on its own, so we  that the combination of -w and -x has the same effect as -x on its own, so we
373  can treat them as the same. */  can treat them as the same. Note that the MAXPATLEN macro assumes the longest
374    prefix+suffix is 10 characters; if anything longer is added, it must be
375    adjusted. */
376    
377  static const char *prefix[] = {  static const char *prefix[] = {
378    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
# Line 266  const char utf8_table4[] = { Line 393  const char utf8_table4[] = {
393    
394    
395  /*************************************************  /*************************************************
396    *          Add item to chain of patterns         *
397    *************************************************/
398    
399    /* Used to add an item onto a chain, or just return an unconnected item if the
400    "after" argument is NULL.
401    
402    Arguments:
403      s          pattern string to add
404      after      if not NULL points to item to insert after
405    
406    Returns:     new pattern block, or NULL after malloc failure
407    */
408    
409    static patstr *
410    add_pattern(char *s, patstr *after)
411    {
412    patstr *p = (patstr *)malloc(sizeof(patstr));
413    if (p == NULL)
414      {
415      fprintf(stderr, "pcregrep: malloc failed\n");
416      return NULL;
417      }
418    if (strlen(s) > MAXPATLEN)
419      {
420      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
421        MAXPATLEN);
422      return NULL;
423      }
424    p->next = NULL;
425    p->string = s;
426    p->compiled = NULL;
427    p->hint = NULL;
428    
429    if (after != NULL)
430      {
431      p->next = after->next;
432      after->next = p;
433      }
434    return p;
435    }
436    
437    
438    /*************************************************
439    *           Free chain of patterns               *
440    *************************************************/
441    
442    /* Used for several chains of patterns.
443    
444    Argument: pointer to start of chain
445    Returns:  nothing
446    */
447    
448    static void
449    free_pattern_chain(patstr *pc)
450    {
451    while (pc != NULL)
452      {
453      patstr *p = pc;
454      pc = p->next;
455      if (p->hint != NULL) pcre_free_study(p->hint);
456      if (p->compiled != NULL) pcre_free(p->compiled);
457      free(p);
458      }
459    }
460    
461    
462    /*************************************************
463    *           Free chain of file names             *
464    *************************************************/
465    
466    /*
467    Argument: pointer to start of chain
468    Returns:  nothing
469    */
470    
471    static void
472    free_file_chain(fnstr *fn)
473    {
474    while (fn != NULL)
475      {
476      fnstr *f = fn;
477      fn = f->next;
478      free(f);
479      }
480    }
481    
482    
483    /*************************************************
484    *         Exit from the program                  *
485    *************************************************/
486    
487    /* If there has been a resource error, give a suitable message.
488    
489    Argument:  the return code
490    Returns:   does not return
491    */
492    
493    static void
494    pcregrep_exit(int rc)
495    {
496    if (resource_error)
497      {
498      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
499        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
500        PCRE_ERROR_JIT_STACKLIMIT);
501      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
502      }
503    
504    exit(rc);
505    }
506    
507    
508    /*************************************************
509  *            OS-specific functions               *  *            OS-specific functions               *
510  *************************************************/  *************************************************/
511    
# Line 281  although at present the only ones are fo Line 521  although at present the only ones are fo
521  #include <dirent.h>  #include <dirent.h>
522    
523  typedef DIR directory_type;  typedef DIR directory_type;
524    #define FILESEP '/'
525    
526  static int  static int
527  isdirectory(char *filename)  isdirectory(char *filename)
# Line 288  isdirectory(char *filename) Line 529  isdirectory(char *filename)
529  struct stat statbuf;  struct stat statbuf;
530  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
531    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
532  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
533  }  }
534    
535  static directory_type *  static directory_type *
# Line 329  return (statbuf.st_mode & S_IFMT) == S_I Line 570  return (statbuf.st_mode & S_IFMT) == S_I
570  }  }
571    
572    
573  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
574    
575  static BOOL  static BOOL
576  is_stdout_tty(void)  is_stdout_tty(void)
# Line 337  is_stdout_tty(void) Line 578  is_stdout_tty(void)
578  return isatty(fileno(stdout));  return isatty(fileno(stdout));
579  }  }
580    
581    static BOOL
582    is_file_tty(FILE *f)
583    {
584    return isatty(fileno(f));
585    }
586    
587    
588  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
589    
# Line 344  return isatty(fileno(stdout)); Line 591  return isatty(fileno(stdout));
591  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
592  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
593  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
594  */  The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
595    undefined when it is indeed undefined. */
596    
597  #elif HAVE_WINDOWS_H  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
598    
599  #ifndef STRICT  #ifndef STRICT
600  # define STRICT  # define STRICT
# Line 368  BOOL first; Line 616  BOOL first;
616  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
617  } directory_type;  } directory_type;
618    
619    #define FILESEP '/'
620    
621  int  int
622  isdirectory(char *filename)  isdirectory(char *filename)
623  {  {
624  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
625  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
626    return 0;    return 0;
627  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
628  }  }
629    
630  directory_type *  directory_type *
# Line 385  char *pattern; Line 635  char *pattern;
635  directory_type *dir;  directory_type *dir;
636  DWORD err;  DWORD err;
637  len = strlen(filename);  len = strlen(filename);
638  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
639  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
640  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
641    {    {
642    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
643    exit(2);    pcregrep_exit(2);
644    }    }
645  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
646  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 449  return !isdirectory(filename); Line 699  return !isdirectory(filename);
699  }  }
700    
701    
702  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
703    
704  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
705    
# Line 459  is_stdout_tty(void) Line 709  is_stdout_tty(void)
709  return FALSE;  return FALSE;
710  }  }
711    
712    static BOOL
713    is_file_tty(FILE *f)
714    {
715    return FALSE;
716    }
717    
718    
719  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
720    
# Line 466  return FALSE; Line 722  return FALSE;
722    
723  #else  #else
724    
725    #define FILESEP 0;
726  typedef void directory_type;  typedef void directory_type;
727    
728  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
# Line 481  void closedirectory(directory_type *dir) Line 738  void closedirectory(directory_type *dir)
738  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
739    
740    
741  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
742    
743  static BOOL  static BOOL
744  is_stdout_tty(void)  is_stdout_tty(void)
# Line 489  is_stdout_tty(void) Line 746  is_stdout_tty(void)
746  return FALSE;  return FALSE;
747  }  }
748    
749    static BOOL
750    is_file_tty(FILE *f)
751    {
752    return FALSE;
753    }
754    
755  #endif  #endif
756    
# Line 517  return sys_errlist[n]; Line 779  return sys_errlist[n];
779    
780    
781  /*************************************************  /*************************************************
782    *            Test exclude/includes               *
783    *************************************************/
784    
785    /* If any exclude pattern matches, the path is excluded. Otherwise, unless
786    there are no includes, the path must match an include pattern.
787    
788    Arguments:
789      path      the path to be matched
790      ip        the chain of include patterns
791      ep        the chain of exclude patterns
792    
793    Returns:    TRUE if the path is not excluded
794    */
795    
796    static BOOL
797    test_incexc(char *path, patstr *ip, patstr *ep)
798    {
799    int plen = strlen(path);
800    
801    for (; ep != NULL; ep = ep->next)
802      {
803      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
804        return FALSE;
805      }
806    
807    if (ip == NULL) return TRUE;
808    
809    for (; ip != NULL; ip = ip->next)
810      {
811      if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
812        return TRUE;
813      }
814    
815    return FALSE;
816    }
817    
818    
819    
820    /*************************************************
821    *            Read one line of input              *
822    *************************************************/
823    
824    /* Normally, input is read using fread() into a large buffer, so many lines may
825    be read at once. However, doing this for tty input means that no output appears
826    until a lot of input has been typed. Instead, tty input is handled line by
827    line. We cannot use fgets() for this, because it does not stop at a binary
828    zero, and therefore there is no way of telling how many characters it has read,
829    because there may be binary zeros embedded in the data.
830    
831    Arguments:
832      buffer     the buffer to read into
833      length     the maximum number of characters to read
834      f          the file
835    
836    Returns:     the number of characters read, zero at end of file
837    */
838    
839    static unsigned int
840    read_one_line(char *buffer, int length, FILE *f)
841    {
842    int c;
843    int yield = 0;
844    while ((c = fgetc(f)) != EOF)
845      {
846      buffer[yield++] = c;
847      if (c == '\n' || yield >= length) break;
848      }
849    return yield;
850    }
851    
852    
853    
854    /*************************************************
855  *             Find end of line                   *  *             Find end of line                   *
856  *************************************************/  *************************************************/
857    
# Line 528  Arguments: Line 863  Arguments:
863    endptr    end of available data    endptr    end of available data
864    lenptr    where to put the length of the eol sequence    lenptr    where to put the length of the eol sequence
865    
866  Returns:    pointer to the last byte of the line  Returns:    pointer after the last byte of the line,
867                including the newline byte(s)
868  */  */
869    
870  static char *  static char *
# Line 798  Arguments: Line 1134  Arguments:
1134  Returns:            nothing  Returns:            nothing
1135  */  */
1136    
1137  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,  static void
1138    char *endptr, char *printname)  do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1139      char *printname)
1140  {  {
1141  if (after_context > 0 && lastmatchnumber > 0)  if (after_context > 0 && lastmatchnumber > 0)
1142    {    {
# Line 811  if (after_context > 0 && lastmatchnumber Line 1148  if (after_context > 0 && lastmatchnumber
1148      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
1149      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1150      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
1151      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1152      lastmatchrestart = pp;      lastmatchrestart = pp;
1153      }      }
1154    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 821  if (after_context > 0 && lastmatchnumber Line 1158  if (after_context > 0 && lastmatchnumber
1158    
1159    
1160  /*************************************************  /*************************************************
1161    *   Apply patterns to subject till one matches   *
1162    *************************************************/
1163    
1164    /* This function is called to run through all patterns, looking for a match. It
1165    is used multiple times for the same subject when colouring is enabled, in order
1166    to find all possible matches.
1167    
1168    Arguments:
1169      matchptr     the start of the subject
1170      length       the length of the subject to match
1171      startoffset  where to start matching
1172      offsets      the offets vector to fill in
1173      mrc          address of where to put the result of pcre_exec()
1174    
1175    Returns:      TRUE if there was a match
1176                  FALSE if there was no match
1177                  invert if there was a non-fatal error
1178    */
1179    
1180    static BOOL
1181    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
1182      int *mrc)
1183    {
1184    int i;
1185    size_t slen = length;
1186    patstr *p = patterns;
1187    const char *msg = "this text:\n\n";
1188    
1189    if (slen > 200)
1190      {
1191      slen = 200;
1192      msg = "text that starts:\n\n";
1193      }
1194    for (i = 1; p != NULL; p = p->next, i++)
1195      {
1196      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1197        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1198      if (*mrc >= 0) return TRUE;
1199      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1200      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1201      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1202      fprintf(stderr, "%s", msg);
1203      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1204      fprintf(stderr, "\n\n");
1205      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1206          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1207        resource_error = TRUE;
1208      if (error_count++ > 20)
1209        {
1210        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1211        pcregrep_exit(2);
1212        }
1213      return invert;    /* No more matching; don't show the line again */
1214      }
1215    
1216    return FALSE;  /* No match, no errors */
1217    }
1218    
1219    
1220    
1221    /*************************************************
1222  *            Grep an individual file             *  *            Grep an individual file             *
1223  *************************************************/  *************************************************/
1224    
1225  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1226  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1227  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1228  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1229  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
# Line 836  Arguments: Line 1234  Arguments:
1234                 the gzFile pointer when reading is via libz                 the gzFile pointer when reading is via libz
1235                 the BZFILE pointer when reading is via libbz2                 the BZFILE pointer when reading is via libbz2
1236    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1237      filename     the file name or NULL (for errors)
1238    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1239                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1240                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1241    
1242  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1243                 1 otherwise (no matches)                 1 otherwise (no matches)
1244                 2 if there is a read error on a .bz2 file                 2 if an overlong line is encountered
1245                   3 if there is a read error on a .bz2 file
1246  */  */
1247    
1248  static int  static int
1249  pcregrep(void *handle, int frtype, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1250  {  {
1251  int rc = 1;  int rc = 1;
1252  int linenumber = 1;  int linenumber = 1;
1253  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1254  int count = 0;  int count = 0;
1255  int filepos = 0;  int filepos = 0;
1256  int offsets[99];  int offsets[OFFSET_SIZE];
1257  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1258  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1259  char *endptr;  char *endptr;
1260  size_t bufflength;  size_t bufflength;
1261    BOOL binary = FALSE;
1262  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1263    BOOL input_line_buffered = line_buffered;
1264  FILE *in = NULL;                    /* Ensure initialized */  FILE *in = NULL;                    /* Ensure initialized */
1265    
1266  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 880  fail. */ Line 1281  fail. */
1281  if (frtype == FR_LIBZ)  if (frtype == FR_LIBZ)
1282    {    {
1283    ingz = (gzFile)handle;    ingz = (gzFile)handle;
1284    bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);    bufflength = gzread (ingz, main_buffer, bufsize);
1285    }    }
1286  else  else
1287  #endif  #endif
# Line 889  else Line 1290  else
1290  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
1291    {    {
1292    inbz2 = (BZFILE *)handle;    inbz2 = (BZFILE *)handle;
1293    bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);    bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1294    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1295    }                                    /* without the cast it is unsigned. */    }                                    /* without the cast it is unsigned. */
1296  else  else
# Line 897  else Line 1298  else
1298    
1299    {    {
1300    in = (FILE *)handle;    in = (FILE *)handle;
1301    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);    if (is_file_tty(in)) input_line_buffered = TRUE;
1302      bufflength = input_line_buffered?
1303        read_one_line(main_buffer, bufsize, in) :
1304        fread(main_buffer, 1, bufsize, in);
1305    }    }
1306    
1307  endptr = buffer + bufflength;  endptr = main_buffer + bufflength;
1308    
1309    /* Unless binary-files=text, see if we have a binary file. This uses the same
1310    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1311    file. */
1312    
1313    if (binary_files != BIN_TEXT)
1314      {
1315      binary =
1316        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1317      if (binary && binary_files == BIN_NOMATCH) return 1;
1318      }
1319    
1320  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1321  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 909  way, the buffer is shifted left and re-f Line 1324  way, the buffer is shifted left and re-f
1324    
1325  while (ptr < endptr)  while (ptr < endptr)
1326    {    {
1327    int i, endlinelength;    int endlinelength;
1328    int mrc = 0;    int mrc = 0;
1329    BOOL match = FALSE;    int startoffset = 0;
1330      BOOL match;
1331    char *matchptr = ptr;    char *matchptr = ptr;
1332    char *t = ptr;    char *t = ptr;
1333    size_t length, linelength;    size_t length, linelength;
# Line 919  while (ptr < endptr) Line 1335  while (ptr < endptr)
1335    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1336    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1337    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1338    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1339    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1340    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1341      first line. */
1342    
1343    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1344    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1345    length = multiline? (size_t)(endptr - ptr) : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1346    
1347      /* Check to see if the line we are looking at extends right to the very end
1348      of the buffer without a line terminator. This means the line is too long to
1349      handle. */
1350    
1351      if (endlinelength == 0 && t == main_buffer + bufsize)
1352        {
1353        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1354                        "pcregrep: check the --buffer-size option\n",
1355                        linenumber,
1356                        (filename == NULL)? "" : " of file ",
1357                        (filename == NULL)? "" : filename);
1358        return 2;
1359        }
1360    
1361    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1362    
1363  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
# Line 936  while (ptr < endptr) Line 1367  while (ptr < endptr)
1367        #include <time.h>        #include <time.h>
1368        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1369        struct timezone dummy;        struct timezone dummy;
1370          int i;
1371    
1372        if (jfriedl_XT)        if (jfriedl_XT)
1373        {        {
# Line 944  while (ptr < endptr) Line 1376  while (ptr < endptr)
1376            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1377            if (!ptr) {            if (!ptr) {
1378                    printf("out of memory");                    printf("out of memory");
1379                    exit(2);                    pcregrep_exit(2);
1380            }            }
1381            endptr = ptr;            endptr = ptr;
1382            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 961  while (ptr < endptr) Line 1393  while (ptr < endptr)
1393    
1394    
1395        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1396            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1397                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1398    
1399        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1400                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 980  while (ptr < endptr) Line 1413  while (ptr < endptr)
1413    
1414    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1415    
1416    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches or there is an error other
1417    the final newline in the subject string. */    than NOMATCH. This code is in a subroutine so that it can be re-used for
1418      finding subsequent matches when colouring matched lines. */
1419    
1420    for (i = 0; i < pattern_count; i++)    match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
     {  
     mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1421    
1422    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1423    
# Line 1026  while (ptr < endptr) Line 1433  while (ptr < endptr)
1433    
1434      if (count_only) count++;      if (count_only) count++;
1435    
1436        /* When handling a binary file and binary-files==binary, the "binary"
1437        variable will be set true (it's false in all other cases). In this
1438        situation we just want to output the file name. No need to scan further. */
1439    
1440        else if (binary)
1441          {
1442          fprintf(stdout, "Binary file %s matches\n", filename);
1443          return 0;
1444          }
1445    
1446      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1447      in the file. */      in the file. */
1448    
1449      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1450        {        {
1451        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1452        return 0;        return 0;
# Line 1039  while (ptr < endptr) Line 1456  while (ptr < endptr)
1456    
1457      else if (quiet) return 0;      else if (quiet) return 0;
1458    
1459      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, or a
1460      the --file-offsets and --line-offsets options output offsets for the      captured portion of it, as long as this string is not empty, and the
1461      matching substring (they both force --only-matching). None of these options      --file-offsets and --line-offsets options output offsets for the matching
1462      prints any context. Afterwards, adjust the start and length, and then jump      substring (they both force --only-matching = 0). None of these options
1463      back to look for further matches in the same line. If we are in invert      prints any context. Afterwards, adjust the start and then jump back to look
1464      mode, however, nothing is printed - this could be still useful because the      for further matches in the same line. If we are in invert mode, however,
1465      return code is set. */      nothing is printed and we do not restart - this could still be useful
1466        because the return code is set. */
1467    
1468      else if (only_matching)      else if (only_matching >= 0)
1469        {        {
1470        if (!invert)        if (!invert)
1471          {          {
1472          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1473          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1474          if (line_offsets)          if (line_offsets)
1475            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1476              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1477          else if (file_offsets)          else if (file_offsets)
1478            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n",
1479                (int)(filepos + matchptr + offsets[0] - ptr),
1480              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1481          else          else if (only_matching < mrc)
1482            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            {
1483          fprintf(stdout, "\n");            int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1484          matchptr += offsets[1];            if (plen > 0)
1485          length -= offsets[1];              {
1486                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1487                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1488                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1489                fprintf(stdout, "\n");
1490                }
1491              }
1492            else if (printname != NULL || number) fprintf(stdout, "\n");
1493          match = FALSE;          match = FALSE;
1494            if (line_buffered) fflush(stdout);
1495            rc = 0;                      /* Had some success */
1496            startoffset = offsets[1];    /* Restart after the match */
1497          goto ONLY_MATCHING_RESTART;          goto ONLY_MATCHING_RESTART;
1498          }          }
1499        }        }
# Line 1100  while (ptr < endptr) Line 1529  while (ptr < endptr)
1529            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1530            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1531            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1532            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1533            lastmatchrestart = pp;            lastmatchrestart = pp;
1534            }            }
1535          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1123  while (ptr < endptr) Line 1552  while (ptr < endptr)
1552          int linecount = 0;          int linecount = 0;
1553          char *p = ptr;          char *p = ptr;
1554    
1555          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1556                 linecount < before_context)                 linecount < before_context)
1557            {            {
1558            linecount++;            linecount++;
1559            p = previous_line(p, buffer);            p = previous_line(p, main_buffer);
1560            }            }
1561    
1562          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 1140  while (ptr < endptr) Line 1569  while (ptr < endptr)
1569            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1570            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1571            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1572            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1573            p = pp;            p = pp;
1574            }            }
1575          }          }
# Line 1160  while (ptr < endptr) Line 1589  while (ptr < endptr)
1589        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1590        the match will always be before the first newline sequence. */        the match will always be before the first newline sequence. */
1591    
1592        if (multiline)        if (multiline & !invert)
1593          {          {
1594          int ellength;          char *endmatch = ptr + offsets[1];
1595          char *endmatch = ptr;          t = ptr;
1596          if (!invert)          while (t < endmatch)
1597            {            {
1598            endmatch += offsets[1];            t = end_of_line(t, endptr, &endlinelength);
1599            t = ptr;            if (t < endmatch) linenumber++; else break;
           while (t < endmatch)  
             {  
             t = end_of_line(t, endptr, &ellength);  
             if (t <= endmatch) linenumber++; else break;  
             }  
1600            }            }
1601          endmatch = end_of_line(endmatch, endptr, &ellength);          linelength = t - ptr - endlinelength;
         linelength = endmatch - ptr - ellength;  
1602          }          }
1603    
1604        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 1190  while (ptr < endptr) Line 1613  while (ptr < endptr)
1613          {          {
1614          int first = S_arg * 2;          int first = S_arg * 2;
1615          int last  = first + 1;          int last  = first + 1;
1616          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1617          fprintf(stdout, "X");          fprintf(stdout, "X");
1618          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1619          }          }
1620        else        else
1621  #endif  #endif
1622    
1623        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1624          matches, but not of course if the line is a non-match. */
1625    
1626        if (do_colour)        if (do_colour && !invert)
1627          {          {
1628          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1629            FWRITE(ptr, 1, offsets[0], stdout);
1630          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1631          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1632          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1633          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1634            stdout);            {
1635              startoffset = offsets[1];
1636              if (startoffset >= (int)linelength + endlinelength ||
1637                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1638                break;
1639              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1640              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1641              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1642              fprintf(stdout, "%c[00m", 0x1b);
1643              }
1644    
1645            /* In multiline mode, we may have already printed the complete line
1646            and its line-ending characters (if they matched the pattern), so there
1647            may be no more to print. */
1648    
1649            plength = (int)((linelength + endlinelength) - startoffset);
1650            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1651          }          }
1652        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1653          /* Not colouring; no need to search for further matches */
1654    
1655          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1656        }        }
1657    
1658      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1659        given, flush the output. */
1660    
1661        if (line_buffered) fflush(stdout);
1662      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1663    
1664      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1244  while (ptr < endptr) Line 1690  while (ptr < endptr)
1690    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1691    
1692    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1693    filepos += linelength + endlinelength;    filepos += (int)(linelength + endlinelength);
1694    linenumber++;    linenumber++;
1695    
1696      /* If input is line buffered, and the buffer is not yet full, read another
1697      line and add it into the buffer. */
1698    
1699      if (input_line_buffered && bufflength < (size_t)bufsize)
1700        {
1701        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1702        bufflength += add;
1703        endptr += add;
1704        }
1705    
1706    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1707    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1708    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1709    about to be lost, print them. */    about to be lost, print them. */
1710    
1711    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1712      {      {
1713      if (after_context > 0 &&      if (after_context > 0 &&
1714          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1715          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1716        {        {
1717        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1718        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 1264  while (ptr < endptr) Line 1720  while (ptr < endptr)
1720    
1721      /* Now do the shuffle */      /* Now do the shuffle */
1722    
1723      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1724      ptr -= MBUFTHIRD;      ptr -= bufthird;
1725    
1726  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
1727      if (frtype == FR_LIBZ)      if (frtype == FR_LIBZ)
1728        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
1729          gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);          gzread (ingz, main_buffer + 2*bufthird, bufthird);
1730      else      else
1731  #endif  #endif
1732    
1733  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
1734      if (frtype == FR_LIBBZ2)      if (frtype == FR_LIBBZ2)
1735        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
1736          BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);          BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1737      else      else
1738  #endif  #endif
1739    
1740      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*bufthird +
1741          (input_line_buffered?
1742      endptr = buffer + bufflength;         read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1743           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1744        endptr = main_buffer + bufflength;
1745    
1746      /* Adjust any last match point */      /* Adjust any last match point */
1747    
1748      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1749      }      }
1750    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1751    
1752  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1753  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1754    
1755  if (!only_matching && !count_only)  if (only_matching < 0 && !count_only)
1756    {    {
1757    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1758    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1313  if (filenames == FN_NOMATCH_ONLY) Line 1771  if (filenames == FN_NOMATCH_ONLY)
1771    
1772  if (count_only)  if (count_only)
1773    {    {
1774    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1775    fprintf(stdout, "%d\n", count);      {
1776        if (printname != NULL && filenames != FN_NONE)
1777          fprintf(stdout, "%s:", printname);
1778        fprintf(stdout, "%d\n", count);
1779        }
1780    }    }
1781    
1782  return rc;  return rc;
# Line 1334  Arguments: Line 1796  Arguments:
1796    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1797    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1798    
1799  Returns:   0 if there was at least one match  Returns:  -1 the file/directory was skipped
1800               0 if there was at least one match
1801             1 if there were no matches             1 if there were no matches
1802             2 there was some kind of error             2 there was some kind of error
1803    
# Line 1345  static int Line 1808  static int
1808  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1809  {  {
1810  int rc = 1;  int rc = 1;
 int sep;  
1811  int frtype;  int frtype;
 int pathlen;  
1812  void *handle;  void *handle;
1813    char *lastcomp;
1814  FILE *in = NULL;           /* Ensure initialized */  FILE *in = NULL;           /* Ensure initialized */
1815    
1816  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 1359  gzFile ingz = NULL; Line 1821  gzFile ingz = NULL;
1821  BZFILE *inbz2 = NULL;  BZFILE *inbz2 = NULL;
1822  #endif  #endif
1823    
1824    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1825    int pathlen;
1826    #endif
1827    
1828  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1829    
1830  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1831    {    {
1832    return pcregrep(stdin, FR_PLAIN,    return pcregrep(stdin, FR_PLAIN, stdin_name,
1833      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1834        stdin_name : NULL);        stdin_name : NULL);
1835    }    }
1836    
1837  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
1838  each file and directory within it, subject to any include or exclude patterns  directories, whereas --include and --exclude apply to everything else. The test
1839  that were set. The scanning code is localized so it can be made  is against the final component of the path. */
1840  system-specific. */  
1841    lastcomp = strrchr(pathname, FILESEP);
1842    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
1843    
1844    /* If the file is a directory, skip if not recursing or if explicitly excluded.
1845    Otherwise, scan the directory and recurse for each path within it. The scanning
1846    code is localized so it can be made system-specific. */
1847    
1848    if (isdirectory(pathname))
1849      {
1850      if (dee_action == dee_SKIP ||
1851          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
1852        return -1;
1853    
 if ((sep = isdirectory(pathname)) != 0)  
   {  
   if (dee_action == dee_SKIP) return 1;  
1854    if (dee_action == dee_RECURSE)    if (dee_action == dee_RECURSE)
1855      {      {
1856      char buffer[1024];      char buffer[1024];
# Line 1392  if ((sep = isdirectory(pathname)) != 0) Line 1867  if ((sep = isdirectory(pathname)) != 0)
1867    
1868      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1869        {        {
1870        int frc, nflen;        int frc;
1871        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
       nflen = strlen(nextfile);  
   
       if (isdirectory(buffer))  
         {  
         if (exclude_dir_compiled != NULL &&  
             pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)  
           continue;  
   
         if (include_dir_compiled != NULL &&  
             pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)  
           continue;  
         }  
       else  
         {  
         if (exclude_compiled != NULL &&  
             pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)  
           continue;  
   
         if (include_compiled != NULL &&  
             pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)  
           continue;  
         }  
   
1872        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1873        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
1874         else if (frc == 0 && rc == 1) rc = 0;         else if (frc == 0 && rc == 1) rc = 0;
# Line 1428  if ((sep = isdirectory(pathname)) != 0) Line 1880  if ((sep = isdirectory(pathname)) != 0)
1880    }    }
1881    
1882  /* If the file is not a directory and not a regular file, skip it if that's  /* If the file is not a directory and not a regular file, skip it if that's
1883  been requested. */  been requested. Otherwise, check for explicit include/exclude. */
1884    
1885  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;  else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
1886              !test_incexc(lastcomp, include_patterns, exclude_patterns))
1887            return -1;
1888    
1889  /* Control reaches here if we have a regular file, or if we have a directory  /* Control reaches here if we have a regular file, or if we have a directory
1890  and recursion or skipping was not requested, or if we have anything else and  and recursion or skipping was not requested, or if we have anything else and
# Line 1438  skipping was not requested. The scan pro Line 1892  skipping was not requested. The scan pro
1892  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1893  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1894    
1895  pathlen = strlen(pathname);  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1896    pathlen = (int)(strlen(pathname));
1897    #endif
1898    
1899  /* Open using zlib if it is supported and the file name ends with .gz. */  /* Open using zlib if it is supported and the file name ends with .gz. */
1900    
# Line 1478  an attempt to read a .bz2 file indicates Line 1934  an attempt to read a .bz2 file indicates
1934  PLAIN_FILE:  PLAIN_FILE:
1935  #endif  #endif
1936    {    {
1937    in = fopen(pathname, "r");    in = fopen(pathname, "rb");
1938    handle = (void *)in;    handle = (void *)in;
1939    frtype = FR_PLAIN;    frtype = FR_PLAIN;
1940    }    }
# Line 1495  if (handle == NULL) Line 1951  if (handle == NULL)
1951    
1952  /* Now grep the file */  /* Now grep the file */
1953    
1954  rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||  rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1955    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1956    
1957  /* Close in an appropriate manner. */  /* Close in an appropriate manner. */
# Line 1506  if (frtype == FR_LIBZ) Line 1962  if (frtype == FR_LIBZ)
1962  else  else
1963  #endif  #endif
1964    
1965  /* If it is a .bz2 file and the result is 2, it means that the first attempt to  /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1966  read failed. If the error indicates that the file isn't in fact bzipped, try  read failed. If the error indicates that the file isn't in fact bzipped, try
1967  again as a normal file. */  again as a normal file. */
1968    
1969  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
1970  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
1971    {    {
1972    if (rc == 2)    if (rc == 3)
1973      {      {
1974      int errnum;      int errnum;
1975      const char *err = BZ2_bzerror(inbz2, &errnum);      const char *err = BZ2_bzerror(inbz2, &errnum);
# Line 1525  if (frtype == FR_LIBBZ2) Line 1981  if (frtype == FR_LIBBZ2)
1981      else if (!silent)      else if (!silent)
1982        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1983          pathname, err);          pathname, err);
1984        rc = 2;    /* The normal "something went wrong" code */
1985      }      }
1986    BZ2_bzclose(inbz2);    BZ2_bzclose(inbz2);
1987    }    }
# Line 1600  for (op = optionlist; op->one_char != 0; Line 2057  for (op = optionlist; op->one_char != 0;
2057    {    {
2058    int n;    int n;
2059    char s[4];    char s[4];
2060    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
2061    n = 30 - printf("  %s --%s", s, op->long_name);    /* Two options were accidentally implemented and documented with underscores
2062      instead of hyphens in their names, something that was not noticed for quite a
2063      few releases. When fixing this, I left the underscored versions in the list
2064      in case people were using them. However, we don't want to display them in the
2065      help data. There are no other options that contain underscores, and we do not
2066      expect ever to implement such options. Therefore, just omit any option that
2067      contains an underscore. */
2068    
2069      if (strchr(op->long_name, '_') != NULL) continue;
2070    
2071      if (op->one_char > 0 && (op->long_name)[0] == 0)
2072        n = 31 - printf("  -%c", op->one_char);
2073      else
2074        {
2075        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
2076          else strcpy(s, "   ");
2077        n = 31 - printf("  %s --%s", s, op->long_name);
2078        }
2079    
2080    if (n < 1) n = 1;    if (n < 1) n = 1;
2081    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                           ", op->help_text);
2082    }    }
2083    
2084  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
2085  printf("trailing white space is removed and blank lines are ignored.\n");  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
2086  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("When reading patterns or file names from a file, trailing white\n");
2087    printf("space is removed and blank lines are ignored.\n");
2088    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
2089    
2090  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
2091  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 1627  handle_option(int letter, int options) Line 2104  handle_option(int letter, int options)
2104  switch(letter)  switch(letter)
2105    {    {
2106    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
2107    case N_HELP: help(); exit(0);    case N_HELP: help(); pcregrep_exit(0);
2108      case N_LBUFFER: line_buffered = TRUE; break;
2109    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
2110      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2111      case 'a': binary_files = BIN_TEXT; break;
2112    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
2113    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
2114    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
2115      case 'I': binary_files = BIN_NOMATCH; break;
2116    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
2117    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
2118    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2119    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
2120    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2121    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
2122    case 'o': only_matching = TRUE; break;    case 'o': only_matching = 0; break;
2123    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
2124    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
2125    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1648  switch(letter) Line 2129  switch(letter)
2129    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
2130    
2131    case 'V':    case 'V':
2132    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stdout, "pcregrep version %s\n", pcre_version());
2133    exit(0);    pcregrep_exit(0);
2134    break;    break;
2135    
2136    default:    default:
2137    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2138    exit(usage(2));    pcregrep_exit(usage(2));
2139    }    }
2140    
2141  return options;  return options;
# Line 1692  return buffer; Line 2173  return buffer;
2173  *          Compile a single pattern              *  *          Compile a single pattern              *
2174  *************************************************/  *************************************************/
2175    
2176  /* When the -F option has been used, this is called for each substring.  /* Do nothing if the pattern has already been compiled. This is the case for
2177  Otherwise it's called for each supplied pattern.  include/exclude patterns read from a file.
2178    
2179    When the -F option has been used, each "pattern" may be a list of strings,
2180    separated by line breaks. They will be matched literally. We split such a
2181    string and compile the first substring, inserting an additional block into the
2182    pattern chain.
2183    
2184  Arguments:  Arguments:
2185    pattern        the pattern string    p              points to the pattern block
2186    options        the PCRE options    options        the PCRE options
2187    filename       the file name, or NULL for a command-line pattern    popts          the processing options
2188      fromfile       TRUE if the pattern was read from a file
2189      fromtext       file name or identifying text (e.g. "include")
2190    count          0 if this is the only command line pattern, or    count          0 if this is the only command line pattern, or
2191                   number of the command line pattern, or                   number of the command line pattern, or
2192                   linenumber for a pattern from a file                   linenumber for a pattern from a file
# Line 1707  Returns:         TRUE on success, FALSE Line 2195  Returns:         TRUE on success, FALSE
2195  */  */
2196    
2197  static BOOL  static BOOL
2198  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_pattern(patstr *p, int options, int popts, int fromfile,
2199      const char *fromtext, int count)
2200  {  {
2201  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
2202  const char *error;  const char *error;
2203    char *ps = p->string;
2204    int patlen = strlen(ps);
2205  int errptr;  int errptr;
2206    
2207  if (pattern_count >= MAX_PATTERN_COUNT)  if (p->compiled != NULL) return TRUE;
   {  
   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",  
     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);  
   return FALSE;  
   }  
2208    
2209  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  if ((popts & PO_FIXED_STRINGS) != 0)
   suffix[process_options]);  
 pattern_list[pattern_count] =  
   pcre_compile(buffer, options, &error, &errptr, pcretables);  
 if (pattern_list[pattern_count] != NULL)  
2210    {    {
2211    pattern_count++;    int ellength;
2212    return TRUE;    char *eop = ps + patlen;
2213      char *pe = end_of_line(ps, eop, &ellength);
2214    
2215      if (ellength != 0)
2216        {
2217        if (add_pattern(pe, p) == NULL) return FALSE;
2218        patlen = (int)(pe - ps - ellength);
2219        }
2220    }    }
2221    
2222    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2223    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2224    if (p->compiled != NULL) return TRUE;
2225    
2226  /* Handle compile errors */  /* Handle compile errors */
2227    
2228  errptr -= (int)strlen(prefix[process_options]);  errptr -= (int)strlen(prefix[popts]);
2229  if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);  if (errptr > patlen) errptr = patlen;
2230    
2231  if (filename == NULL)  if (fromfile)
2232    {    {
2233    if (count == 0)    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2234      fprintf(stderr, "pcregrep: Error in command-line regex "      "at offset %d: %s\n", count, fromtext, errptr, error);
       "at offset %d: %s\n", errptr, error);  
   else  
     fprintf(stderr, "pcregrep: Error in %s command-line regex "  
       "at offset %d: %s\n", ordin(count), errptr, error);  
2235    }    }
2236  else  else
2237    {    {
2238    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "    if (count == 0)
2239      "at offset %d: %s\n", count, filename, errptr, error);      fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2240          fromtext, errptr, error);
2241      else
2242        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2243          ordin(count), fromtext, errptr, error);
2244    }    }
2245    
2246  return FALSE;  return FALSE;
# Line 1756  return FALSE; Line 2249  return FALSE;
2249    
2250    
2251  /*************************************************  /*************************************************
2252  *           Compile one supplied pattern         *  *     Read and compile a file of patterns        *
2253  *************************************************/  *************************************************/
2254    
2255  /* When the -F option has been used, each string may be a list of strings,  /* This is used for --filelist, --include-from, and --exclude-from.
 separated by line breaks. They will be matched literally.  
2256    
2257  Arguments:  Arguments:
2258    pattern        the pattern string    name         the name of the file; "-" is stdin
2259    options        the PCRE options    patptr       pointer to the pattern chain anchor
2260    filename       the file name, or NULL for a command-line pattern    patlastptr   pointer to the last pattern pointer
2261    count          0 if this is the only command line pattern, or    popts        the process options to pass to pattern_compile()
                  number of the command line pattern, or  
                  linenumber for a pattern from a file  
2262    
2263  Returns:         TRUE on success, FALSE after an error  Returns:       TRUE if all went well
2264  */  */
2265    
2266  static BOOL  static BOOL
2267  compile_pattern(char *pattern, int options, char *filename, int count)  read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2268  {  {
2269  if ((process_options & PO_FIXED_STRINGS) != 0)  int linenumber = 0;
2270    FILE *f;
2271    char *filename;
2272    char buffer[PATBUFSIZE];
2273    
2274    if (strcmp(name, "-") == 0)
2275      {
2276      f = stdin;
2277      filename = stdin_name;
2278      }
2279    else
2280    {    {
2281    char *eop = pattern + strlen(pattern);    f = fopen(name, "r");
2282    char buffer[MBUFTHIRD];    if (f == NULL)
2283        {
2284        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2285        return FALSE;
2286        }
2287      filename = name;
2288      }
2289    
2290    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2291      {
2292      char *s = buffer + (int)strlen(buffer);
2293      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2294      *s = 0;
2295      linenumber++;
2296      if (buffer[0] == 0) continue;   /* Skip blank lines */
2297    
2298      /* Note: this call to add_pattern() puts a pointer to the local variable
2299      "buffer" into the pattern chain. However, that pointer is used only when
2300      compiling the pattern, which happens immediately below, so we flatten it
2301      afterwards, as a precaution against any later code trying to use it. */
2302    
2303      *patlastptr = add_pattern(buffer, *patlastptr);
2304      if (*patlastptr == NULL) return FALSE;
2305      if (*patptr == NULL) *patptr = *patlastptr;
2306    
2307      /* This loop is needed because compiling a "pattern" when -F is set may add
2308      on additional literal patterns if the original contains a newline. In the
2309      common case, it never will, because fgets() stops at a newline. However,
2310      the -N option can be used to give pcregrep a different newline setting. */
2311    
2312    for(;;)    for(;;)
2313      {      {
2314      int ellength;      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2315      char *p = end_of_line(pattern, eop, &ellength);          linenumber))
     if (ellength == 0)  
       return compile_single_pattern(pattern, options, filename, count);  
     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);  
     pattern = p;  
     if (!compile_single_pattern(buffer, options, filename, count))  
2316        return FALSE;        return FALSE;
2317        (*patlastptr)->string = NULL;            /* Insurance */
2318        if ((*patlastptr)->next == NULL) break;
2319        *patlastptr = (*patlastptr)->next;
2320      }      }
2321    }    }
2322  else return compile_single_pattern(pattern, options, filename, count);  
2323    if (f != stdin) fclose(f);
2324    return TRUE;
2325  }  }
2326    
2327    
# Line 1808  main(int argc, char **argv) Line 2337  main(int argc, char **argv)
2337  {  {
2338  int i, j;  int i, j;
2339  int rc = 1;  int rc = 1;
 int pcre_options = 0;  
 int cmd_pattern_count = 0;  
 int hint_count = 0;  
 int errptr;  
2340  BOOL only_one_at_top;  BOOL only_one_at_top;
2341  char *patterns[MAX_PATTERN_COUNT];  patstr *cp;
2342    fnstr *fn;
2343  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2344  const char *error;  const char *error;
2345    
2346    #ifdef SUPPORT_PCREGREP_JIT
2347    pcre_jit_stack *jit_stack = NULL;
2348    #endif
2349    
2350  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2351  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2352  */  Note that the return values from pcre_config(), though derived from the ASCII
2353    codes, are the same in EBCDIC environments, so we must use the actual values
2354    rather than escapes such as as '\r'. */
2355    
2356  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2357  switch(i)  switch(i)
2358    {    {
2359    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
2360    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
2361    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2362    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
2363    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
2364    }    }
2365    
2366  /* Process the options */  /* Process the options */
# Line 1847  for (i = 1; i < argc; i++) Line 2379  for (i = 1; i < argc; i++)
2379    
2380    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2381      {      {
2382      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_files != NULL || patterns != NULL) break;
2383        else exit(usage(2));        else pcregrep_exit(usage(2));
2384      }      }
2385    
2386    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1870  for (i = 1; i < argc; i++) Line 2402  for (i = 1; i < argc; i++)
2402      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2403      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2404      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2405      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2406      these categories, fortunately. */      both these categories. */
2407    
2408      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2409        {        {
2410        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2411        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2412        if (opbra == NULL)     /* Not a (p) case */  
2413          /* Handle options with only one spelling of the name */
2414    
2415          if (opbra == NULL)     /* Does not contain '(' */
2416          {          {
2417          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2418            {            {
# Line 1885  for (i = 1; i < argc; i++) Line 2420  for (i = 1; i < argc; i++)
2420            }            }
2421          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2422            {            {
2423            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2424            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2425                (int)strlen(arg) : (int)(argequals - arg);
2426            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2427              {              {
2428              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1899  for (i = 1; i < argc; i++) Line 2435  for (i = 1; i < argc; i++)
2435              }              }
2436            }            }
2437          }          }
2438        else                   /* Special case xxxx(p) */  
2439          /* Handle options with an alternate spelling of the name */
2440    
2441          else
2442          {          {
2443          char buff1[24];          char buff1[24];
2444          char buff2[24];          char buff2[24];
2445          int baselen = opbra - op->long_name;  
2446            int baselen = (int)(opbra - op->long_name);
2447            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2448            int arglen = (argequals == NULL || equals == NULL)?
2449              (int)strlen(arg) : (int)(argequals - arg);
2450    
2451          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2452          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2453            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2454          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2455               strncmp(arg, buff2, arglen) == 0)
2456              {
2457              if (equals != NULL && argequals != NULL)
2458                {
2459                option_data = argequals;
2460                if (*option_data == '=')
2461                  {
2462                  option_data++;
2463                  longopwasequals = TRUE;
2464                  }
2465                }
2466            break;            break;
2467              }
2468          }          }
2469        }        }
2470    
2471      if (op->one_char == 0)      if (op->one_char == 0)
2472        {        {
2473        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2474        exit(usage(2));        pcregrep_exit(usage(2));
2475        }        }
2476      }      }
2477    
   
2478    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2479    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2480    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1953  for (i = 1; i < argc; i++) Line 2508  for (i = 1; i < argc; i++)
2508      while (*s != 0)      while (*s != 0)
2509        {        {
2510        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2511          { if (*s == op->one_char) break; }          {
2512            if (*s == op->one_char) break;
2513            }
2514        if (op->one_char == 0)        if (op->one_char == 0)
2515          {          {
2516          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2517            *s, argv[i]);            *s, argv[i]);
2518          exit(usage(2));          pcregrep_exit(usage(2));
2519            }
2520    
2521          /* Check for a single-character option that has data: OP_OP_NUMBER
2522          is used for one that either has a numerical number or defaults, i.e. the
2523          data is optional. If a digit follows, there is data; if not, carry on
2524          with other single-character options in the same string. */
2525    
2526          option_data = s+1;
2527          if (op->type == OP_OP_NUMBER)
2528            {
2529            if (isdigit((unsigned char)s[1])) break;
2530          }          }
2531        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for end or a dataless option */
2532          {          {
2533          option_data = s+1;          if (op->type != OP_NODATA || s[1] == 0) break;
         break;  
2534          }          }
2535    
2536          /* Handle a single-character option with no data, then loop for the
2537          next character in the string. */
2538    
2539        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2540        }        }
2541      }      }
# Line 1981  for (i = 1; i < argc; i++) Line 2552  for (i = 1; i < argc; i++)
2552    
2553    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2554    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2555    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2556    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2557    
2558    if (*option_data == 0 &&    if (*option_data == 0 &&
2559        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1992  for (i = 1; i < argc; i++) Line 2563  for (i = 1; i < argc; i++)
2563        case N_COLOUR:        case N_COLOUR:
2564        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2565        break;        break;
2566    
2567          case 'o':
2568          only_matching = 0;
2569          break;
2570    
2571  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2572        case 'S':        case 'S':
2573        S_arg = 0;        S_arg = 0;
# Line 2008  for (i = 1; i < argc; i++) Line 2584  for (i = 1; i < argc; i++)
2584      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2585        {        {
2586        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2587        exit(usage(2));        pcregrep_exit(usage(2));
2588        }        }
2589      option_data = argv[++i];      option_data = argv[++i];
2590      }      }
2591    
2592    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_PATLIST, it's the -e option, or one of the
2593    multiple times to create a list of patterns. */    include/exclude options, which can be called multiple times to create lists
2594      of patterns. */
2595    
2596    if (op->type == OP_PATLIST)    if (op->type == OP_PATLIST)
2597         {
2598         patdatastr *pd = (patdatastr *)op->dataptr;
2599         *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2600         if (*(pd->lastptr) == NULL) goto EXIT2;
2601         if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2602         }
2603    
2604      /* If the option type is OP_FILELIST, it's one of the options that names a
2605      file. */
2606    
2607      else if (op->type == OP_FILELIST)
2608        {
2609        fndatastr *fd = (fndatastr *)op->dataptr;
2610        fn = (fnstr *)malloc(sizeof(fnstr));
2611        if (fn == NULL)
2612          {
2613          fprintf(stderr, "pcregrep: malloc failed\n");
2614          goto EXIT2;
2615          }
2616        fn->next = NULL;
2617        fn->name = option_data;
2618        if (*(fd->anchor) == NULL)
2619          *(fd->anchor) = fn;
2620        else
2621          (*(fd->lastptr))->next = fn;
2622        *(fd->lastptr) = fn;
2623        }
2624    
2625      /* Handle OP_BINARY_FILES */
2626    
2627      else if (op->type == OP_BINFILES)
2628      {      {
2629      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      if (strcmp(option_data, "binary") == 0)
2630          binary_files = BIN_BINARY;
2631        else if (strcmp(option_data, "without-match") == 0)
2632          binary_files = BIN_NOMATCH;
2633        else if (strcmp(option_data, "text") == 0)
2634          binary_files = BIN_TEXT;
2635        else
2636        {        {
2637        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",        fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2638          MAX_PATTERN_COUNT);          option_data);
2639        return 2;        pcregrep_exit(usage(2));
2640        }        }
     patterns[cmd_pattern_count++] = option_data;  
2641      }      }
2642    
2643    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
2644    
2645    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2646               op->type != OP_OP_NUMBER)
2647      {      {
2648      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2649      }      }
2650    
2651      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2652      only for unpicking arguments, so just keep it simple. */
2653    
2654    else    else
2655      {      {
2656      char *endptr;      unsigned long int n = 0;
2657      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2658        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2659        while (isdigit((unsigned char)(*endptr)))
2660          n = n * 10 + (int)(*endptr++ - '0');
2661        if (toupper(*endptr) == 'K')
2662          {
2663          n *= 1024;
2664          endptr++;
2665          }
2666        else if (toupper(*endptr) == 'M')
2667          {
2668          n *= 1024*1024;
2669          endptr++;
2670          }
2671      if (*endptr != 0)      if (*endptr != 0)
2672        {        {
2673        if (longop)        if (longop)
2674          {          {
2675          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2676          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2677            equals - op->long_name;            (int)(equals - op->long_name);
2678          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2679            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2680          }          }
2681        else        else
2682          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2683            option_data, op->one_char);            option_data, op->one_char);
2684        exit(usage(2));        pcregrep_exit(usage(2));
2685        }        }
2686      *((int *)op->dataptr) = n;      if (op->type == OP_LONGNUMBER)
2687            *((unsigned long int *)op->dataptr) = n;
2688        else
2689            *((int *)op->dataptr) = n;
2690      }      }
2691    }    }
2692    
# Line 2066  if (both_context > 0) Line 2700  if (both_context > 0)
2700    }    }
2701    
2702  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2703  However, the latter two set the only_matching flag. */  However, the latter two set only_matching. */
2704    
2705  if ((only_matching && (file_offsets || line_offsets)) ||  if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2706      (file_offsets && line_offsets))      (file_offsets && line_offsets))
2707    {    {
2708    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2709      "and/or --line-offsets\n");      "and/or --line-offsets\n");
2710    exit(usage(2));    pcregrep_exit(usage(2));
2711    }    }
2712    
2713  if (file_offsets || line_offsets) only_matching = TRUE;  if (file_offsets || line_offsets) only_matching = 0;
2714    
2715  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2716  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2200  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 2834  if (jfriedl_XT != 0 || jfriedl_XR != 0)
2834    }    }
2835  #endif  #endif
2836    
2837  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer. */
2838    
2839  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  bufsize = 3*bufthird;
2840  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  main_buffer = (char *)malloc(bufsize);
2841    
2842  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL)
2843    {    {
2844    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2845    goto EXIT2;    goto EXIT2;
2846    }    }
2847    
2848  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there are no files provided by -f,
2849  the first argument is the one and only pattern, and it must exist. */  the first argument is the one and only pattern, and it must exist. */
2850    
2851  if (cmd_pattern_count == 0 && pattern_filename == NULL)  if (patterns == NULL && pattern_files == NULL)
2852    {    {
2853    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2854    patterns[cmd_pattern_count++] = argv[i++];    patterns = patterns_last = add_pattern(argv[i++], NULL);
2855      if (patterns == NULL) goto EXIT2;
2856    }    }
2857    
2858  /* Compile the patterns that were provided on the command line, either by  /* Compile the patterns that were provided on the command line, either by
2859  multiple uses of -e or as a single unkeyed pattern. */  multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2860    after all the command-line options are read so that we know which PCRE options
2861    to use. When -F is used, compile_pattern() may add another block into the
2862    chain, so we must not access the next pointer till after the compile. */
2863    
2864  for (j = 0; j < cmd_pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2865    {    {
2866    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
2867         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 1 && patterns->next == NULL)? 0 : j))
2868      goto EXIT2;      goto EXIT2;
2869    }    }
2870    
2871  /* Compile the regular expressions that are provided in a file. */  /* Read and compile the regular expressions that are provided in files. */
2872    
2873  if (pattern_filename != NULL)  for (fn = pattern_files; fn != NULL; fn = fn->next)
2874    {    {
2875    int linenumber = 0;    if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
2876    FILE *f;      goto EXIT2;
   char *filename;  
   char buffer[MBUFTHIRD];  
   
   if (strcmp(pattern_filename, "-") == 0)  
     {  
     f = stdin;  
     filename = stdin_name;  
     }  
   else  
     {  
     f = fopen(pattern_filename, "r");  
     if (f == NULL)  
       {  
       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,  
         strerror(errno));  
       goto EXIT2;  
       }  
     filename = pattern_filename;  
     }  
   
   while (fgets(buffer, MBUFTHIRD, f) != NULL)  
     {  
     char *s = buffer + (int)strlen(buffer);  
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;  
     *s = 0;  
     linenumber++;  
     if (buffer[0] == 0) continue;   /* Skip blank lines */  
     if (!compile_pattern(buffer, pcre_options, filename, linenumber))  
       goto EXIT2;  
     }  
   
   if (f != stdin) fclose(f);  
2877    }    }
2878    
2879  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times. Unless
2880    JIT has been explicitly disabled, arrange a stack for it to use. */
2881    
2882  for (j = 0; j < pattern_count; j++)  #ifdef SUPPORT_PCREGREP_JIT
2883    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2884      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2885    #endif
2886    
2887    for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2888    {    {
2889    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
2890    if (error != NULL)    if (error != NULL)
2891      {      {
2892      char s[16];      char s[16];
2893      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
2894      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2895      goto EXIT2;      goto EXIT2;
2896      }      }
2897    hint_count++;  #ifdef SUPPORT_PCREGREP_JIT
2898      if (jit_stack != NULL && cp->hint != NULL)
2899        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
2900    #endif
2901    }    }
2902    
2903  /* If there are include or exclude patterns, compile them. */  /* If --match-limit or --recursion-limit was set, put the value(s) into the
2904    pcre_extra block for each pattern. */
2905    
2906  if (exclude_pattern != NULL)  if (match_limit > 0 || match_limit_recursion > 0)
2907    {    {
2908    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,    for (cp = patterns; cp != NULL; cp = cp->next)
     pcretables);  
   if (exclude_compiled == NULL)  
2909      {      {
2910      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      if (cp->hint == NULL)
2911        errptr, error);        {
2912      goto EXIT2;        cp->hint = (pcre_extra *)malloc(sizeof(pcre_extra));
2913          if (cp->hint == NULL)
2914            {
2915            fprintf(stderr, "pcregrep: malloc failed\n");
2916            pcregrep_exit(2);
2917            }
2918          }
2919        if (match_limit > 0)
2920          {
2921          cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
2922          cp->hint->match_limit = match_limit;
2923          }
2924        if (match_limit_recursion > 0)
2925          {
2926          cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2927          cp->hint->match_limit_recursion = match_limit_recursion;
2928          }
2929      }      }
2930    }    }
2931    
2932  if (include_pattern != NULL)  /* If there are include or exclude patterns read from the command line, compile
2933    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
2934    0. */
2935    
2936    for (j = 0; j < 4; j++)
2937    {    {
2938    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,    int k;
2939      pcretables);    for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
   if (include_compiled == NULL)  
2940      {      {
2941      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
2942        errptr, error);           (k == 1 && cp->next == NULL)? 0 : k))
2943      goto EXIT2;        goto EXIT2;
2944      }      }
2945    }    }
2946    
2947  if (exclude_dir_pattern != NULL)  /* Read and compile include/exclude patterns from files. */
2948    
2949    for (fn = include_from; fn != NULL; fn = fn->next)
2950    {    {
2951    exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,    if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
     pcretables);  
   if (exclude_dir_compiled == NULL)  
     {  
     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",  
       errptr, error);  
2952      goto EXIT2;      goto EXIT2;
     }  
2953    }    }
2954    
2955  if (include_dir_pattern != NULL)  for (fn = exclude_from; fn != NULL; fn = fn->next)
2956    {    {
2957    include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,    if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
     pcretables);  
   if (include_dir_compiled == NULL)  
     {  
     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",  
       errptr, error);  
2958      goto EXIT2;      goto EXIT2;
     }  
2959    }    }
2960    
2961  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no files that contain lists of files to search, and there are
2962    no file arguments, search stdin, and then exit. */
2963    
2964  if (i >= argc)  if (file_lists == NULL && i >= argc)
2965    {    {
2966    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2967        (filenames > FN_DEFAULT)? stdin_name : NULL);
2968    goto EXIT;    goto EXIT;
2969    }    }
2970    
2971  /* Otherwise, work through the remaining arguments as files or directories.  /* If any files that contains a list of files to search have been specified,
2972  Pass in the fact that there is only one argument at top level - this suppresses  read them line by line and search the given files. */
 the file name if the argument is not a directory and filenames are not  
 otherwise forced. */  
2973    
2974  only_one_at_top = i == argc - 1;   /* Catch initial value of i */  for (fn = file_lists; fn != NULL; fn = fn->next)
2975      {
2976      char buffer[PATBUFSIZE];
2977      FILE *fl;
2978      if (strcmp(fn->name, "-") == 0) fl = stdin; else
2979        {
2980        fl = fopen(fn->name, "rb");
2981        if (fl == NULL)
2982          {
2983          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
2984            strerror(errno));
2985          goto EXIT2;
2986          }
2987        }
2988      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2989        {
2990        int frc;
2991        char *end = buffer + (int)strlen(buffer);
2992        while (end > buffer && isspace(end[-1])) end--;
2993        *end = 0;
2994        if (*buffer != 0)
2995          {
2996          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
2997          if (frc > 1) rc = frc;
2998            else if (frc == 0 && rc == 1) rc = 0;
2999          }
3000        }
3001      if (fl != stdin) fclose(fl);
3002      }
3003    
3004    /* After handling file-list, work through remaining arguments. Pass in the fact
3005    that there is only one argument at top level - this suppresses the file name if
3006    the argument is not a directory and filenames are not otherwise forced. */
3007    
3008    only_one_at_top = i == argc - 1 && file_lists == NULL;
3009    
3010  for (; i < argc; i++)  for (; i < argc; i++)
3011    {    {
# Line 2359  for (; i < argc; i++) Line 3016  for (; i < argc; i++)
3016    }    }
3017    
3018  EXIT:  EXIT:
3019  if (pattern_list != NULL)  #ifdef SUPPORT_PCREGREP_JIT
3020    {  if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3021    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);  #endif
3022    free(pattern_list);  
3023    }  if (main_buffer != NULL) free(main_buffer);
3024  if (hints_list != NULL)  
3025    {  free_pattern_chain(patterns);
3026    for (i = 0; i < hint_count; i++) free(hints_list[i]);  free_pattern_chain(include_patterns);
3027    free(hints_list);  free_pattern_chain(include_dir_patterns);
3028    }  free_pattern_chain(exclude_patterns);
3029  return rc;  free_pattern_chain(exclude_dir_patterns);
3030    
3031    free_file_chain(exclude_from);
3032    free_file_chain(include_from);
3033    free_file_chain(pattern_files);
3034    free_file_chain(file_lists);
3035    
3036    pcregrep_exit(rc);
3037    
3038  EXIT2:  EXIT2:
3039  rc = 2;  rc = 2;

Legend:
Removed from v.345  
changed lines
  Added in v.1004

  ViewVC Help
Powered by ViewVC 1.1.5