/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 421 by ph10, Fri Aug 14 15:43:27 2009 UTC revision 1033 by ph10, Mon Sep 10 11:02:48 2012 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 70  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define MAX_PATTERN_COUNT 100  
73  #define OFFSET_SIZE 99  #define OFFSET_SIZE 99
74    
75  #if BUFSIZ > 8192  #if BUFSIZ > 8192
76  #define MBUFTHIRD BUFSIZ  #define MAXPATLEN BUFSIZ
77  #else  #else
78  #define MBUFTHIRD 8192  #define MAXPATLEN 8192
79  #endif  #endif
80    
81    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
82    
83  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
84  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
85  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
# Line 104  enum { DEE_READ, DEE_SKIP }; Line 105  enum { DEE_READ, DEE_SKIP };
105    
106  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107    
108    /* Binary file options */
109    
110    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111    
112    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113    environments), a warning is issued if the value of fwrite() is ignored.
114    Unfortunately, casting to (void) does not suppress the warning. To get round
115    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116    apply to fprintf(). */
117    
118    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119    
120    
121    
122  /*************************************************  /*************************************************
# Line 127  static char *colour_string = (char *)"1; Line 140  static char *colour_string = (char *)"1;
140  static char *colour_option = NULL;  static char *colour_option = NULL;
141  static char *dee_option = NULL;  static char *dee_option = NULL;
142  static char *DEE_option = NULL;  static char *DEE_option = NULL;
143    static char *locale = NULL;
144    static char *main_buffer = NULL;
145  static char *newline = NULL;  static char *newline = NULL;
 static char *pattern_filename = NULL;  
146  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
 static char *locale = NULL;  
147    
148  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
149    
 static int  pattern_count = 0;  
 static pcre **pattern_list = NULL;  
 static pcre_extra **hints_list = NULL;  
   
 static char *include_pattern = NULL;  
 static char *exclude_pattern = NULL;  
 static char *include_dir_pattern = NULL;  
 static char *exclude_dir_pattern = NULL;  
   
 static pcre *include_compiled = NULL;  
 static pcre *exclude_compiled = NULL;  
 static pcre *include_dir_compiled = NULL;  
 static pcre *exclude_dir_compiled = NULL;  
   
150  static int after_context = 0;  static int after_context = 0;
151  static int before_context = 0;  static int before_context = 0;
152    static int binary_files = BIN_BINARY;
153  static int both_context = 0;  static int both_context = 0;
154    static int bufthird = PCREGREP_BUFSIZE;
155    static int bufsize = 3*PCREGREP_BUFSIZE;
156    
157    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
158    static int dee_action = dee_SKIP;
159    #else
160  static int dee_action = dee_READ;  static int dee_action = dee_READ;
161    #endif
162    
163  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
164  static int error_count = 0;  static int error_count = 0;
165  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
166    static int only_matching = -1;
167    static int pcre_options = 0;
168  static int process_options = 0;  static int process_options = 0;
169    
170    #ifdef SUPPORT_PCREGREP_JIT
171    static int study_options = PCRE_STUDY_JIT_COMPILE;
172    #else
173    static int study_options = 0;
174    #endif
175    
176    static unsigned long int match_limit = 0;
177    static unsigned long int match_limit_recursion = 0;
178    
179  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
180  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
181  static BOOL file_offsets = FALSE;  static BOOL file_offsets = FALSE;
182  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
183  static BOOL invert = FALSE;  static BOOL invert = FALSE;
184    static BOOL line_buffered = FALSE;
185  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
186  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
187  static BOOL number = FALSE;  static BOOL number = FALSE;
188  static BOOL omit_zero_count = FALSE;  static BOOL omit_zero_count = FALSE;
189  static BOOL only_matching = FALSE;  static BOOL resource_error = FALSE;
190  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
191  static BOOL silent = FALSE;  static BOOL silent = FALSE;
192  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
193    
194    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
195    
196    typedef struct fnstr {
197      struct fnstr *next;
198      char *name;
199    } fnstr;
200    
201    static fnstr *exclude_from = NULL;
202    static fnstr *exclude_from_last = NULL;
203    static fnstr *include_from = NULL;
204    static fnstr *include_from_last = NULL;
205    
206    static fnstr *file_lists = NULL;
207    static fnstr *file_lists_last = NULL;
208    static fnstr *pattern_files = NULL;
209    static fnstr *pattern_files_last = NULL;
210    
211    /* Structure for holding the two variables that describe a file name chain. */
212    
213    typedef struct fndatastr {
214      fnstr **anchor;
215      fnstr **lastptr;
216    } fndatastr;
217    
218    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
219    static fndatastr include_from_data = { &include_from, &include_from_last };
220    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
221    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
222    
223    /* Structure for pattern and its compiled form; used for matching patterns and
224    also for include/exclude patterns. */
225    
226    typedef struct patstr {
227      struct patstr *next;
228      char *string;
229      pcre *compiled;
230      pcre_extra *hint;
231    } patstr;
232    
233    static patstr *patterns = NULL;
234    static patstr *patterns_last = NULL;
235    static patstr *include_patterns = NULL;
236    static patstr *include_patterns_last = NULL;
237    static patstr *exclude_patterns = NULL;
238    static patstr *exclude_patterns_last = NULL;
239    static patstr *include_dir_patterns = NULL;
240    static patstr *include_dir_patterns_last = NULL;
241    static patstr *exclude_dir_patterns = NULL;
242    static patstr *exclude_dir_patterns_last = NULL;
243    
244    /* Structure holding the two variables that describe a pattern chain. A pointer
245    to such structures is used for each appropriate option. */
246    
247    typedef struct patdatastr {
248      patstr **anchor;
249      patstr **lastptr;
250    } patdatastr;
251    
252    static patdatastr match_patdata = { &patterns, &patterns_last };
253    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
254    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
255    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
256    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
257    
258    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
259                                     &include_dir_patterns, &exclude_dir_patterns };
260    
261    static const char *incexname[4] = { "--include", "--exclude",
262                                        "--include-dir", "--exclude-dir" };
263    
264  /* Structure for options and list of them */  /* Structure for options and list of them */
265    
266  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
267         OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST, OP_FILELIST, OP_BINFILES };
268    
269  typedef struct option_item {  typedef struct option_item {
270    int type;    int type;
# Line 198  used to identify them. */ Line 288  used to identify them. */
288  #define N_NULL         (-9)  #define N_NULL         (-9)
289  #define N_LOFFSETS     (-10)  #define N_LOFFSETS     (-10)
290  #define N_FOFFSETS     (-11)  #define N_FOFFSETS     (-11)
291    #define N_LBUFFER      (-12)
292    #define N_M_LIMIT      (-13)
293    #define N_M_LIMIT_REC  (-14)
294    #define N_BUFSIZE      (-15)
295    #define N_NOJIT        (-16)
296    #define N_FILE_LIST    (-17)
297    #define N_BINARY_FILES (-18)
298    #define N_EXCLUDE_FROM (-19)
299    #define N_INCLUDE_FROM (-20)
300    
301  static option_item optionlist[] = {  static option_item optionlist[] = {
302    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
303    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
304    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
305    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
306    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
307    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
308    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
309    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
310    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
311    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
312    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
313    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
314    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
315    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
316    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
317    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
318    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
319    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
320    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
321    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
322    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
323    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
324    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },  #ifdef SUPPORT_PCREGREP_JIT
325    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
326    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },  #else
327    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
328    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },  #endif
329    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
330    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
331    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
332    { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },    { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
333    { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },    { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
334      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
335      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
336      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
337      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
338      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
339      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
340      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
341      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
342      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
343      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
344      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
345      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
346      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
347      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
348      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
349    
350      /* These two were accidentally implemented with underscores instead of
351      hyphens in the option names. As this was not discovered for several releases,
352      the incorrect versions are left in the table for compatibility. However, the
353      --help function misses out any option that has an underscore in its name. */
354    
355      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
356      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
357    
358  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
359    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
360  #endif  #endif
# Line 247  static option_item optionlist[] = { Line 370  static option_item optionlist[] = {
370  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
371  options. These set the 1, 2, and 4 bits in process_options, respectively. Note  options. These set the 1, 2, and 4 bits in process_options, respectively. Note
372  that the combination of -w and -x has the same effect as -x on its own, so we  that the combination of -w and -x has the same effect as -x on its own, so we
373  can treat them as the same. */  can treat them as the same. Note that the MAXPATLEN macro assumes the longest
374    prefix+suffix is 10 characters; if anything longer is added, it must be
375    adjusted. */
376    
377  static const char *prefix[] = {  static const char *prefix[] = {
378    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
# Line 268  const char utf8_table4[] = { Line 393  const char utf8_table4[] = {
393    
394    
395  /*************************************************  /*************************************************
396    *          Add item to chain of patterns         *
397    *************************************************/
398    
399    /* Used to add an item onto a chain, or just return an unconnected item if the
400    "after" argument is NULL.
401    
402    Arguments:
403      s          pattern string to add
404      after      if not NULL points to item to insert after
405    
406    Returns:     new pattern block, or NULL after malloc failure
407    */
408    
409    static patstr *
410    add_pattern(char *s, patstr *after)
411    {
412    patstr *p = (patstr *)malloc(sizeof(patstr));
413    if (p == NULL)
414      {
415      fprintf(stderr, "pcregrep: malloc failed\n");
416      return NULL;
417      }
418    if (strlen(s) > MAXPATLEN)
419      {
420      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
421        MAXPATLEN);
422      return NULL;
423      }
424    p->next = NULL;
425    p->string = s;
426    p->compiled = NULL;
427    p->hint = NULL;
428    
429    if (after != NULL)
430      {
431      p->next = after->next;
432      after->next = p;
433      }
434    return p;
435    }
436    
437    
438    /*************************************************
439    *           Free chain of patterns               *
440    *************************************************/
441    
442    /* Used for several chains of patterns.
443    
444    Argument: pointer to start of chain
445    Returns:  nothing
446    */
447    
448    static void
449    free_pattern_chain(patstr *pc)
450    {
451    while (pc != NULL)
452      {
453      patstr *p = pc;
454      pc = p->next;
455      if (p->hint != NULL) pcre_free_study(p->hint);
456      if (p->compiled != NULL) pcre_free(p->compiled);
457      free(p);
458      }
459    }
460    
461    
462    /*************************************************
463    *           Free chain of file names             *
464    *************************************************/
465    
466    /*
467    Argument: pointer to start of chain
468    Returns:  nothing
469    */
470    
471    static void
472    free_file_chain(fnstr *fn)
473    {
474    while (fn != NULL)
475      {
476      fnstr *f = fn;
477      fn = f->next;
478      free(f);
479      }
480    }
481    
482    
483    /*************************************************
484    *         Exit from the program                  *
485    *************************************************/
486    
487    /* If there has been a resource error, give a suitable message.
488    
489    Argument:  the return code
490    Returns:   does not return
491    */
492    
493    static void
494    pcregrep_exit(int rc)
495    {
496    if (resource_error)
497      {
498      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
499        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
500        PCRE_ERROR_JIT_STACKLIMIT);
501      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
502      }
503    
504    exit(rc);
505    }
506    
507    
508    /*************************************************
509  *            OS-specific functions               *  *            OS-specific functions               *
510  *************************************************/  *************************************************/
511    
# Line 283  although at present the only ones are fo Line 521  although at present the only ones are fo
521  #include <dirent.h>  #include <dirent.h>
522    
523  typedef DIR directory_type;  typedef DIR directory_type;
524    #define FILESEP '/'
525    
526  static int  static int
527  isdirectory(char *filename)  isdirectory(char *filename)
# Line 290  isdirectory(char *filename) Line 529  isdirectory(char *filename)
529  struct stat statbuf;  struct stat statbuf;
530  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
531    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
532  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
533  }  }
534    
535  static directory_type *  static directory_type *
# Line 331  return (statbuf.st_mode & S_IFMT) == S_I Line 570  return (statbuf.st_mode & S_IFMT) == S_I
570  }  }
571    
572    
573  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
574    
575  static BOOL  static BOOL
576  is_stdout_tty(void)  is_stdout_tty(void)
# Line 339  is_stdout_tty(void) Line 578  is_stdout_tty(void)
578  return isatty(fileno(stdout));  return isatty(fileno(stdout));
579  }  }
580    
581    static BOOL
582    is_file_tty(FILE *f)
583    {
584    return isatty(fileno(f));
585    }
586    
587    
588  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
589    
# Line 346  return isatty(fileno(stdout)); Line 591  return isatty(fileno(stdout));
591  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
592  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
593  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
594  */  The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
595    undefined when it is indeed undefined. */
596    
597  #elif HAVE_WINDOWS_H  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
598    
599  #ifndef STRICT  #ifndef STRICT
600  # define STRICT  # define STRICT
# Line 370  BOOL first; Line 616  BOOL first;
616  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
617  } directory_type;  } directory_type;
618    
619    #define FILESEP '/'
620    
621  int  int
622  isdirectory(char *filename)  isdirectory(char *filename)
623  {  {
624  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
625  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
626    return 0;    return 0;
627  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
628  }  }
629    
630  directory_type *  directory_type *
# Line 387  char *pattern; Line 635  char *pattern;
635  directory_type *dir;  directory_type *dir;
636  DWORD err;  DWORD err;
637  len = strlen(filename);  len = strlen(filename);
638  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
639  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
640  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
641    {    {
642    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
643    exit(2);    pcregrep_exit(2);
644    }    }
645  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
646  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 451  return !isdirectory(filename); Line 699  return !isdirectory(filename);
699  }  }
700    
701    
702  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
703    
704  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
705    
# Line 461  is_stdout_tty(void) Line 709  is_stdout_tty(void)
709  return FALSE;  return FALSE;
710  }  }
711    
712    static BOOL
713    is_file_tty(FILE *f)
714    {
715    return FALSE;
716    }
717    
718    
719  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
720    
# Line 468  return FALSE; Line 722  return FALSE;
722    
723  #else  #else
724    
725    #define FILESEP 0
726  typedef void directory_type;  typedef void directory_type;
727    
728  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
# Line 483  void closedirectory(directory_type *dir) Line 738  void closedirectory(directory_type *dir)
738  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
739    
740    
741  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
742    
743  static BOOL  static BOOL
744  is_stdout_tty(void)  is_stdout_tty(void)
# Line 491  is_stdout_tty(void) Line 746  is_stdout_tty(void)
746  return FALSE;  return FALSE;
747  }  }
748    
749    static BOOL
750    is_file_tty(FILE *f)
751    {
752    return FALSE;
753    }
754    
755  #endif  #endif
756    
# Line 519  return sys_errlist[n]; Line 779  return sys_errlist[n];
779    
780    
781  /*************************************************  /*************************************************
782    *            Test exclude/includes               *
783    *************************************************/
784    
785    /* If any exclude pattern matches, the path is excluded. Otherwise, unless
786    there are no includes, the path must match an include pattern.
787    
788    Arguments:
789      path      the path to be matched
790      ip        the chain of include patterns
791      ep        the chain of exclude patterns
792    
793    Returns:    TRUE if the path is not excluded
794    */
795    
796    static BOOL
797    test_incexc(char *path, patstr *ip, patstr *ep)
798    {
799    int plen = strlen(path);
800    
801    for (; ep != NULL; ep = ep->next)
802      {
803      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
804        return FALSE;
805      }
806    
807    if (ip == NULL) return TRUE;
808    
809    for (; ip != NULL; ip = ip->next)
810      {
811      if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
812        return TRUE;
813      }
814    
815    return FALSE;
816    }
817    
818    
819    
820    /*************************************************
821    *            Read one line of input              *
822    *************************************************/
823    
824    /* Normally, input is read using fread() into a large buffer, so many lines may
825    be read at once. However, doing this for tty input means that no output appears
826    until a lot of input has been typed. Instead, tty input is handled line by
827    line. We cannot use fgets() for this, because it does not stop at a binary
828    zero, and therefore there is no way of telling how many characters it has read,
829    because there may be binary zeros embedded in the data.
830    
831    Arguments:
832      buffer     the buffer to read into
833      length     the maximum number of characters to read
834      f          the file
835    
836    Returns:     the number of characters read, zero at end of file
837    */
838    
839    static unsigned int
840    read_one_line(char *buffer, int length, FILE *f)
841    {
842    int c;
843    int yield = 0;
844    while ((c = fgetc(f)) != EOF)
845      {
846      buffer[yield++] = c;
847      if (c == '\n' || yield >= length) break;
848      }
849    return yield;
850    }
851    
852    
853    
854    /*************************************************
855  *             Find end of line                   *  *             Find end of line                   *
856  *************************************************/  *************************************************/
857    
# Line 530  Arguments: Line 863  Arguments:
863    endptr    end of available data    endptr    end of available data
864    lenptr    where to put the length of the eol sequence    lenptr    where to put the length of the eol sequence
865    
866  Returns:    pointer to the last byte of the line  Returns:    pointer after the last byte of the line,
867                including the newline byte(s)
868  */  */
869    
870  static char *  static char *
# Line 599  switch(endlinetype) Line 933  switch(endlinetype)
933    
934      switch (c)      switch (c)
935        {        {
936        case 0x0a:    /* LF */        case '\n':
937        *lenptr = 1;        *lenptr = 1;
938        return p;        return p;
939    
940        case 0x0d:    /* CR */        case '\r':
941        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
942          {          {
943          *lenptr = 2;          *lenptr = 2;
944          p++;          p++;
# Line 643  switch(endlinetype) Line 977  switch(endlinetype)
977    
978      switch (c)      switch (c)
979        {        {
980        case 0x0a:    /* LF */        case '\n':    /* LF */
981        case 0x0b:    /* VT */        case '\v':    /* VT */
982        case 0x0c:    /* FF */        case '\f':    /* FF */
983        *lenptr = 1;        *lenptr = 1;
984        return p;        return p;
985    
986        case 0x0d:    /* CR */        case '\r':    /* CR */
987        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
988          {          {
989          *lenptr = 2;          *lenptr = 2;
990          p++;          p++;
# Line 658  switch(endlinetype) Line 992  switch(endlinetype)
992        else *lenptr = 1;        else *lenptr = 1;
993        return p;        return p;
994    
995        case 0x85:    /* NEL */  #ifndef EBCDIC
996          case 0x85:    /* Unicode NEL */
997        *lenptr = utf8? 2 : 1;        *lenptr = utf8? 2 : 1;
998        return p;        return p;
999    
1000        case 0x2028:  /* LS */        case 0x2028:  /* Unicode LS */
1001        case 0x2029:  /* PS */        case 0x2029:  /* Unicode PS */
1002        *lenptr = 3;        *lenptr = 3;
1003        return p;        return p;
1004    #endif  /* Not EBCDIC */
1005    
1006        default:        default:
1007        break;        break;
# Line 749  switch(endlinetype) Line 1085  switch(endlinetype)
1085    
1086      if (endlinetype == EL_ANYCRLF) switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
1087        {        {
1088        case 0x0a:    /* LF */        case '\n':    /* LF */
1089        case 0x0d:    /* CR */        case '\r':    /* CR */
1090        return p;        return p;
1091    
1092        default:        default:
# Line 759  switch(endlinetype) Line 1095  switch(endlinetype)
1095    
1096      else switch (c)      else switch (c)
1097        {        {
1098        case 0x0a:    /* LF */        case '\n':    /* LF */
1099        case 0x0b:    /* VT */        case '\v':    /* VT */
1100        case 0x0c:    /* FF */        case '\f':    /* FF */
1101        case 0x0d:    /* CR */        case '\r':    /* CR */
1102        case 0x85:    /* NEL */  #ifndef EBCDIE
1103        case 0x2028:  /* LS */        case 0x85:    /* Unicode NEL */
1104        case 0x2029:  /* PS */        case 0x2028:  /* Unicode LS */
1105          case 0x2029:  /* Unicode PS */
1106    #endif  /* Not EBCDIC */
1107        return p;        return p;
1108    
1109        default:        default:
# Line 800  Arguments: Line 1138  Arguments:
1138  Returns:            nothing  Returns:            nothing
1139  */  */
1140    
1141  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,  static void
1142    char *endptr, char *printname)  do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1143      char *printname)
1144  {  {
1145  if (after_context > 0 && lastmatchnumber > 0)  if (after_context > 0 && lastmatchnumber > 0)
1146    {    {
# Line 813  if (after_context > 0 && lastmatchnumber Line 1152  if (after_context > 0 && lastmatchnumber
1152      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
1153      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1154      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
1155      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1156      lastmatchrestart = pp;      lastmatchrestart = pp;
1157      }      }
1158    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 831  is used multiple times for the same subj Line 1170  is used multiple times for the same subj
1170  to find all possible matches.  to find all possible matches.
1171    
1172  Arguments:  Arguments:
1173    matchptr    the start of the subject    matchptr     the start of the subject
1174    length      the length of the subject to match    length       the length of the subject to match
1175    offsets     the offets vector to fill in    startoffset  where to start matching
1176    mrc         address of where to put the result of pcre_exec()    offsets      the offets vector to fill in
1177      mrc          address of where to put the result of pcre_exec()
1178    
1179  Returns:      TRUE if there was a match  Returns:      TRUE if there was a match
1180                FALSE if there was no match                FALSE if there was no match
# Line 842  Returns:      TRUE if there was a match Line 1182  Returns:      TRUE if there was a match
1182  */  */
1183    
1184  static BOOL  static BOOL
1185  match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)  match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
1186      int *mrc)
1187  {  {
1188  int i;  int i;
1189  for (i = 0; i < pattern_count; i++)  size_t slen = length;
1190    patstr *p = patterns;
1191    const char *msg = "this text:\n\n";
1192    
1193    if (slen > 200)
1194    {    {
1195    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,    slen = 200;
1196      PCRE_NOTEMPTY, offsets, OFFSET_SIZE);    msg = "text that starts:\n\n";
1197      }
1198    for (i = 1; p != NULL; p = p->next, i++)
1199      {
1200      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1201        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1202    if (*mrc >= 0) return TRUE;    if (*mrc >= 0) return TRUE;
1203    if (*mrc == PCRE_ERROR_NOMATCH) continue;    if (*mrc == PCRE_ERROR_NOMATCH) continue;
1204    fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);    fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1205    if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);    if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1206    fprintf(stderr, "this text:\n");    fprintf(stderr, "%s", msg);
1207    fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */    FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1208    fprintf(stderr, "\n");    fprintf(stderr, "\n\n");
1209    if (error_count == 0 &&    if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1210        (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))        *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1211      {      resource_error = TRUE;
     fprintf(stderr, "pcregrep: error %d means that a resource limit "  
       "was exceeded\n", *mrc);  
     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
     }  
1212    if (error_count++ > 20)    if (error_count++ > 20)
1213      {      {
1214      fprintf(stderr, "pcregrep: too many errors - abandoned\n");      fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1215      exit(2);      pcregrep_exit(2);
1216      }      }
1217    return invert;    /* No more matching; don't show the line again */    return invert;    /* No more matching; don't show the line again */
1218    }    }
# Line 881  return FALSE;  /* No match, no errors */ Line 1227  return FALSE;  /* No match, no errors */
1227  *************************************************/  *************************************************/
1228    
1229  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1230  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1231  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1232  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1233  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
# Line 892  Arguments: Line 1238  Arguments:
1238                 the gzFile pointer when reading is via libz                 the gzFile pointer when reading is via libz
1239                 the BZFILE pointer when reading is via libbz2                 the BZFILE pointer when reading is via libbz2
1240    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1241      filename     the file name or NULL (for errors)
1242    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1243                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1244                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1245    
1246  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1247                 1 otherwise (no matches)                 1 otherwise (no matches)
1248                 2 if there is a read error on a .bz2 file                 2 if an overlong line is encountered
1249                   3 if there is a read error on a .bz2 file
1250  */  */
1251    
1252  static int  static int
1253  pcregrep(void *handle, int frtype, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1254  {  {
1255  int rc = 1;  int rc = 1;
1256  int linenumber = 1;  int linenumber = 1;
# Line 911  int count = 0; Line 1259  int count = 0;
1259  int filepos = 0;  int filepos = 0;
1260  int offsets[OFFSET_SIZE];  int offsets[OFFSET_SIZE];
1261  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1262  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1263  char *endptr;  char *endptr;
1264  size_t bufflength;  size_t bufflength;
1265    BOOL binary = FALSE;
1266  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1267    BOOL input_line_buffered = line_buffered;
1268  FILE *in = NULL;                    /* Ensure initialized */  FILE *in = NULL;                    /* Ensure initialized */
1269    
1270  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 936  fail. */ Line 1285  fail. */
1285  if (frtype == FR_LIBZ)  if (frtype == FR_LIBZ)
1286    {    {
1287    ingz = (gzFile)handle;    ingz = (gzFile)handle;
1288    bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);    bufflength = gzread (ingz, main_buffer, bufsize);
1289    }    }
1290  else  else
1291  #endif  #endif
# Line 945  else Line 1294  else
1294  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
1295    {    {
1296    inbz2 = (BZFILE *)handle;    inbz2 = (BZFILE *)handle;
1297    bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);    bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1298    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1299    }                                    /* without the cast it is unsigned. */    }                                    /* without the cast it is unsigned. */
1300  else  else
# Line 953  else Line 1302  else
1302    
1303    {    {
1304    in = (FILE *)handle;    in = (FILE *)handle;
1305    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);    if (is_file_tty(in)) input_line_buffered = TRUE;
1306      bufflength = input_line_buffered?
1307        read_one_line(main_buffer, bufsize, in) :
1308        fread(main_buffer, 1, bufsize, in);
1309    }    }
1310    
1311  endptr = buffer + bufflength;  endptr = main_buffer + bufflength;
1312    
1313    /* Unless binary-files=text, see if we have a binary file. This uses the same
1314    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1315    file. */
1316    
1317    if (binary_files != BIN_TEXT)
1318      {
1319      binary =
1320        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1321      if (binary && binary_files == BIN_NOMATCH) return 1;
1322      }
1323    
1324  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1325  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 967  while (ptr < endptr) Line 1330  while (ptr < endptr)
1330    {    {
1331    int endlinelength;    int endlinelength;
1332    int mrc = 0;    int mrc = 0;
1333      int startoffset = 0;
1334    BOOL match;    BOOL match;
1335    char *matchptr = ptr;    char *matchptr = ptr;
1336    char *t = ptr;    char *t = ptr;
# Line 984  while (ptr < endptr) Line 1348  while (ptr < endptr)
1348    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1349    length = multiline? (size_t)(endptr - ptr) : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1350    
1351      /* Check to see if the line we are looking at extends right to the very end
1352      of the buffer without a line terminator. This means the line is too long to
1353      handle. */
1354    
1355      if (endlinelength == 0 && t == main_buffer + bufsize)
1356        {
1357        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1358                        "pcregrep: check the --buffer-size option\n",
1359                        linenumber,
1360                        (filename == NULL)? "" : " of file ",
1361                        (filename == NULL)? "" : filename);
1362        return 2;
1363        }
1364    
1365    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1366    
1367  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
# Line 1002  while (ptr < endptr) Line 1380  while (ptr < endptr)
1380            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1381            if (!ptr) {            if (!ptr) {
1382                    printf("out of memory");                    printf("out of memory");
1383                    exit(2);                    pcregrep_exit(2);
1384            }            }
1385            endptr = ptr;            endptr = ptr;
1386            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 1019  while (ptr < endptr) Line 1397  while (ptr < endptr)
1397    
1398    
1399        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1400            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,            match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1401                PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);                PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1402    
1403        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
# Line 1043  while (ptr < endptr) Line 1421  while (ptr < endptr)
1421    than NOMATCH. This code is in a subroutine so that it can be re-used for    than NOMATCH. This code is in a subroutine so that it can be re-used for
1422    finding subsequent matches when colouring matched lines. */    finding subsequent matches when colouring matched lines. */
1423    
1424    match = match_patterns(matchptr, length, offsets, &mrc);    match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1425    
1426    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1427    
# Line 1059  while (ptr < endptr) Line 1437  while (ptr < endptr)
1437    
1438      if (count_only) count++;      if (count_only) count++;
1439    
1440        /* When handling a binary file and binary-files==binary, the "binary"
1441        variable will be set true (it's false in all other cases). In this
1442        situation we just want to output the file name. No need to scan further. */
1443    
1444        else if (binary)
1445          {
1446          fprintf(stdout, "Binary file %s matches\n", filename);
1447          return 0;
1448          }
1449    
1450      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1451      in the file. */      in the file. */
1452    
# Line 1072  while (ptr < endptr) Line 1460  while (ptr < endptr)
1460    
1461      else if (quiet) return 0;      else if (quiet) return 0;
1462    
1463      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, or a
1464      the --file-offsets and --line-offsets options output offsets for the      captured portion of it, as long as this string is not empty, and the
1465      matching substring (they both force --only-matching). None of these options      --file-offsets and --line-offsets options output offsets for the matching
1466      prints any context. Afterwards, adjust the start and length, and then jump      substring (they both force --only-matching = 0). None of these options
1467      back to look for further matches in the same line. If we are in invert      prints any context. Afterwards, adjust the start and then jump back to look
1468      mode, however, nothing is printed - this could be still useful because the      for further matches in the same line. If we are in invert mode, however,
1469      return code is set. */      nothing is printed and we do not restart - this could still be useful
1470        because the return code is set. */
1471    
1472      else if (only_matching)      else if (only_matching >= 0)
1473        {        {
1474        if (!invert)        if (!invert)
1475          {          {
1476          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1477          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1478          if (line_offsets)          if (line_offsets)
1479            fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1480              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1481          else if (file_offsets)          else if (file_offsets)
1482            fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),            fprintf(stdout, "%d,%d\n",
1483                (int)(filepos + matchptr + offsets[0] - ptr),
1484              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1485          else          else if (only_matching < mrc)
1486            {            {
1487            if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);            int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1488            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            if (plen > 0)
1489            if (do_colour) fprintf(stdout, "%c[00m", 0x1b);              {
1490                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1491                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1492                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1493                fprintf(stdout, "\n");
1494                }
1495            }            }
1496          fprintf(stdout, "\n");          else if (printname != NULL || number) fprintf(stdout, "\n");
         matchptr += offsets[1];  
         length -= offsets[1];  
1497          match = FALSE;          match = FALSE;
1498            if (line_buffered) fflush(stdout);
1499            rc = 0;                      /* Had some success */
1500            startoffset = offsets[1];    /* Restart after the match */
1501          goto ONLY_MATCHING_RESTART;          goto ONLY_MATCHING_RESTART;
1502          }          }
1503        }        }
# Line 1137  while (ptr < endptr) Line 1533  while (ptr < endptr)
1533            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1534            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1535            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1536            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1537            lastmatchrestart = pp;            lastmatchrestart = pp;
1538            }            }
1539          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1160  while (ptr < endptr) Line 1556  while (ptr < endptr)
1556          int linecount = 0;          int linecount = 0;
1557          char *p = ptr;          char *p = ptr;
1558    
1559          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1560                 linecount < before_context)                 linecount < before_context)
1561            {            {
1562            linecount++;            linecount++;
1563            p = previous_line(p, buffer);            p = previous_line(p, main_buffer);
1564            }            }
1565    
1566          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 1177  while (ptr < endptr) Line 1573  while (ptr < endptr)
1573            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1574            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1575            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1576            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1577            p = pp;            p = pp;
1578            }            }
1579          }          }
# Line 1197  while (ptr < endptr) Line 1593  while (ptr < endptr)
1593        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1594        the match will always be before the first newline sequence. */        the match will always be before the first newline sequence. */
1595    
1596        if (multiline)        if (multiline & !invert)
1597          {          {
1598          int ellength;          char *endmatch = ptr + offsets[1];
1599          char *endmatch = ptr;          t = ptr;
1600          if (!invert)          while (t < endmatch)
1601            {            {
1602            endmatch += offsets[1];            t = end_of_line(t, endptr, &endlinelength);
1603            t = ptr;            if (t < endmatch) linenumber++; else break;
           while (t < endmatch)  
             {  
             t = end_of_line(t, endptr, &ellength);  
             if (t <= endmatch) linenumber++; else break;  
             }  
1604            }            }
1605          endmatch = end_of_line(endmatch, endptr, &ellength);          linelength = t - ptr - endlinelength;
         linelength = endmatch - ptr - ellength;  
1606          }          }
1607    
1608        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 1227  while (ptr < endptr) Line 1617  while (ptr < endptr)
1617          {          {
1618          int first = S_arg * 2;          int first = S_arg * 2;
1619          int last  = first + 1;          int last  = first + 1;
1620          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1621          fprintf(stdout, "X");          fprintf(stdout, "X");
1622          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1623          }          }
1624        else        else
1625  #endif  #endif
1626    
1627        /* We have to split the line(s) up if colouring, and search for further        /* We have to split the line(s) up if colouring, and search for further
1628        matches. */        matches, but not of course if the line is a non-match. */
1629    
1630        if (do_colour)        if (do_colour && !invert)
1631          {          {
1632          int last_offset = 0;          int plength;
1633          fwrite(ptr, 1, offsets[0], stdout);          FWRITE(ptr, 1, offsets[0], stdout);
1634          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1635          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1636          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1637          for (;;)          for (;;)
1638            {            {
1639            last_offset += offsets[1];            startoffset = offsets[1];
1640            matchptr += offsets[1];            if (startoffset >= (int)linelength + endlinelength ||
1641            length -= offsets[1];                !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1642            if (!match_patterns(matchptr, length, offsets, &mrc)) break;              break;
1643            fwrite(matchptr, 1, offsets[0], stdout);            FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1644            fprintf(stdout, "%c[%sm", 0x1b, colour_string);            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1645            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1646            fprintf(stdout, "%c[00m", 0x1b);            fprintf(stdout, "%c[00m", 0x1b);
1647            }            }
1648          fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,  
1649            stdout);          /* In multiline mode, we may have already printed the complete line
1650            and its line-ending characters (if they matched the pattern), so there
1651            may be no more to print. */
1652    
1653            plength = (int)((linelength + endlinelength) - startoffset);
1654            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1655          }          }
1656    
1657        /* Not colouring; no need to search for further matches */        /* Not colouring; no need to search for further matches */
1658    
1659        else fwrite(ptr, 1, linelength + endlinelength, stdout);        else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1660        }        }
1661    
1662      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1663        given, flush the output. */
1664    
1665        if (line_buffered) fflush(stdout);
1666      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1667    
1668      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1297  while (ptr < endptr) Line 1694  while (ptr < endptr)
1694    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1695    
1696    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1697    filepos += linelength + endlinelength;    filepos += (int)(linelength + endlinelength);
1698    linenumber++;    linenumber++;
1699    
1700      /* If input is line buffered, and the buffer is not yet full, read another
1701      line and add it into the buffer. */
1702    
1703      if (input_line_buffered && bufflength < (size_t)bufsize)
1704        {
1705        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1706        bufflength += add;
1707        endptr += add;
1708        }
1709    
1710    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1711    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1712    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1713    about to be lost, print them. */    about to be lost, print them. */
1714    
1715    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1716      {      {
1717      if (after_context > 0 &&      if (after_context > 0 &&
1718          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1719          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1720        {        {
1721        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1722        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 1317  while (ptr < endptr) Line 1724  while (ptr < endptr)
1724    
1725      /* Now do the shuffle */      /* Now do the shuffle */
1726    
1727      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1728      ptr -= MBUFTHIRD;      ptr -= bufthird;
1729    
1730  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
1731      if (frtype == FR_LIBZ)      if (frtype == FR_LIBZ)
1732        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
1733          gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);          gzread (ingz, main_buffer + 2*bufthird, bufthird);
1734      else      else
1735  #endif  #endif
1736    
1737  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
1738      if (frtype == FR_LIBBZ2)      if (frtype == FR_LIBBZ2)
1739        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
1740          BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);          BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1741      else      else
1742  #endif  #endif
1743    
1744      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*bufthird +
1745          (input_line_buffered?
1746      endptr = buffer + bufflength;         read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1747           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1748        endptr = main_buffer + bufflength;
1749    
1750      /* Adjust any last match point */      /* Adjust any last match point */
1751    
1752      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1753      }      }
1754    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1755    
1756  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1757  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1758    
1759  if (!only_matching && !count_only)  if (only_matching < 0 && !count_only)
1760    {    {
1761    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1762    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1367  if (filenames == FN_NOMATCH_ONLY) Line 1776  if (filenames == FN_NOMATCH_ONLY)
1776  if (count_only)  if (count_only)
1777    {    {
1778    if (count > 0 || !omit_zero_count)    if (count > 0 || !omit_zero_count)
1779      {      {
1780      if (printname != NULL && filenames != FN_NONE)      if (printname != NULL && filenames != FN_NONE)
1781        fprintf(stdout, "%s:", printname);        fprintf(stdout, "%s:", printname);
1782      fprintf(stdout, "%d\n", count);      fprintf(stdout, "%d\n", count);
1783      }      }
1784    }    }
1785    
1786  return rc;  return rc;
# Line 1391  Arguments: Line 1800  Arguments:
1800    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1801    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1802    
1803  Returns:   0 if there was at least one match  Returns:  -1 the file/directory was skipped
1804               0 if there was at least one match
1805             1 if there were no matches             1 if there were no matches
1806             2 there was some kind of error             2 there was some kind of error
1807    
# Line 1402  static int Line 1812  static int
1812  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1813  {  {
1814  int rc = 1;  int rc = 1;
 int sep;  
1815  int frtype;  int frtype;
 int pathlen;  
1816  void *handle;  void *handle;
1817    char *lastcomp;
1818  FILE *in = NULL;           /* Ensure initialized */  FILE *in = NULL;           /* Ensure initialized */
1819    
1820  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 1416  gzFile ingz = NULL; Line 1825  gzFile ingz = NULL;
1825  BZFILE *inbz2 = NULL;  BZFILE *inbz2 = NULL;
1826  #endif  #endif
1827    
1828    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1829    int pathlen;
1830    #endif
1831    
1832  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1833    
1834  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1835    {    {
1836    return pcregrep(stdin, FR_PLAIN,    return pcregrep(stdin, FR_PLAIN, stdin_name,
1837      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1838        stdin_name : NULL);        stdin_name : NULL);
1839    }    }
1840    
1841  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
1842  each file and directory within it, subject to any include or exclude patterns  directories, whereas --include and --exclude apply to everything else. The test
1843  that were set. The scanning code is localized so it can be made  is against the final component of the path. */
1844  system-specific. */  
1845    lastcomp = strrchr(pathname, FILESEP);
1846    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
1847    
1848    /* If the file is a directory, skip if not recursing or if explicitly excluded.
1849    Otherwise, scan the directory and recurse for each path within it. The scanning
1850    code is localized so it can be made system-specific. */
1851    
1852    if (isdirectory(pathname))
1853      {
1854      if (dee_action == dee_SKIP ||
1855          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
1856        return -1;
1857    
 if ((sep = isdirectory(pathname)) != 0)  
   {  
   if (dee_action == dee_SKIP) return 1;  
1858    if (dee_action == dee_RECURSE)    if (dee_action == dee_RECURSE)
1859      {      {
1860      char buffer[1024];      char buffer[1024];
# Line 1449  if ((sep = isdirectory(pathname)) != 0) Line 1871  if ((sep = isdirectory(pathname)) != 0)
1871    
1872      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1873        {        {
1874        int frc, nflen;        int frc;
1875        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
       nflen = strlen(nextfile);  
   
       if (isdirectory(buffer))  
         {  
         if (exclude_dir_compiled != NULL &&  
             pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)  
           continue;  
   
         if (include_dir_compiled != NULL &&  
             pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)  
           continue;  
         }  
       else  
         {  
         if (exclude_compiled != NULL &&  
             pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)  
           continue;  
   
         if (include_compiled != NULL &&  
             pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)  
           continue;  
         }  
   
1876        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1877        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
1878         else if (frc == 0 && rc == 1) rc = 0;         else if (frc == 0 && rc == 1) rc = 0;
# Line 1485  if ((sep = isdirectory(pathname)) != 0) Line 1884  if ((sep = isdirectory(pathname)) != 0)
1884    }    }
1885    
1886  /* If the file is not a directory and not a regular file, skip it if that's  /* If the file is not a directory and not a regular file, skip it if that's
1887  been requested. */  been requested. Otherwise, check for explicit include/exclude. */
1888    
1889  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;  else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
1890              !test_incexc(lastcomp, include_patterns, exclude_patterns))
1891            return -1;
1892    
1893  /* Control reaches here if we have a regular file, or if we have a directory  /* Control reaches here if we have a regular file, or if we have a directory
1894  and recursion or skipping was not requested, or if we have anything else and  and recursion or skipping was not requested, or if we have anything else and
# Line 1495  skipping was not requested. The scan pro Line 1896  skipping was not requested. The scan pro
1896  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1897  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1898    
1899  pathlen = strlen(pathname);  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1900    pathlen = (int)(strlen(pathname));
1901    #endif
1902    
1903  /* Open using zlib if it is supported and the file name ends with .gz. */  /* Open using zlib if it is supported and the file name ends with .gz. */
1904    
# Line 1552  if (handle == NULL) Line 1955  if (handle == NULL)
1955    
1956  /* Now grep the file */  /* Now grep the file */
1957    
1958  rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||  rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1959    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1960    
1961  /* Close in an appropriate manner. */  /* Close in an appropriate manner. */
# Line 1563  if (frtype == FR_LIBZ) Line 1966  if (frtype == FR_LIBZ)
1966  else  else
1967  #endif  #endif
1968    
1969  /* If it is a .bz2 file and the result is 2, it means that the first attempt to  /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1970  read failed. If the error indicates that the file isn't in fact bzipped, try  read failed. If the error indicates that the file isn't in fact bzipped, try
1971  again as a normal file. */  again as a normal file. */
1972    
1973  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
1974  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
1975    {    {
1976    if (rc == 2)    if (rc == 3)
1977      {      {
1978      int errnum;      int errnum;
1979      const char *err = BZ2_bzerror(inbz2, &errnum);      const char *err = BZ2_bzerror(inbz2, &errnum);
# Line 1582  if (frtype == FR_LIBBZ2) Line 1985  if (frtype == FR_LIBBZ2)
1985      else if (!silent)      else if (!silent)
1986        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1987          pathname, err);          pathname, err);
1988        rc = 2;    /* The normal "something went wrong" code */
1989      }      }
1990    BZ2_bzclose(inbz2);    BZ2_bzclose(inbz2);
1991    }    }
# Line 1657  for (op = optionlist; op->one_char != 0; Line 2061  for (op = optionlist; op->one_char != 0;
2061    {    {
2062    int n;    int n;
2063    char s[4];    char s[4];
2064    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
2065    n = 30 - printf("  %s --%s", s, op->long_name);    /* Two options were accidentally implemented and documented with underscores
2066      instead of hyphens in their names, something that was not noticed for quite a
2067      few releases. When fixing this, I left the underscored versions in the list
2068      in case people were using them. However, we don't want to display them in the
2069      help data. There are no other options that contain underscores, and we do not
2070      expect ever to implement such options. Therefore, just omit any option that
2071      contains an underscore. */
2072    
2073      if (strchr(op->long_name, '_') != NULL) continue;
2074    
2075      if (op->one_char > 0 && (op->long_name)[0] == 0)
2076        n = 31 - printf("  -%c", op->one_char);
2077      else
2078        {
2079        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
2080          else strcpy(s, "   ");
2081        n = 31 - printf("  %s --%s", s, op->long_name);
2082        }
2083    
2084    if (n < 1) n = 1;    if (n < 1) n = 1;
2085    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                           ", op->help_text);
2086    }    }
2087    
2088  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
2089  printf("trailing white space is removed and blank lines are ignored.\n");  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
2090  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("When reading patterns or file names from a file, trailing white\n");
2091    printf("space is removed and blank lines are ignored.\n");
2092    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
2093    
2094  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
2095  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 1684  handle_option(int letter, int options) Line 2108  handle_option(int letter, int options)
2108  switch(letter)  switch(letter)
2109    {    {
2110    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
2111    case N_HELP: help(); exit(0);    case N_HELP: help(); pcregrep_exit(0);
2112      case N_LBUFFER: line_buffered = TRUE; break;
2113    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
2114      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2115      case 'a': binary_files = BIN_TEXT; break;
2116    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
2117    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
2118    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
2119      case 'I': binary_files = BIN_NOMATCH; break;
2120    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
2121    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
2122    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2123    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
2124    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2125    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
2126    case 'o': only_matching = TRUE; break;    case 'o': only_matching = 0; break;
2127    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
2128    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
2129    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1705  switch(letter) Line 2133  switch(letter)
2133    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
2134    
2135    case 'V':    case 'V':
2136    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stdout, "pcregrep version %s\n", pcre_version());
2137    exit(0);    pcregrep_exit(0);
2138    break;    break;
2139    
2140    default:    default:
2141    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2142    exit(usage(2));    pcregrep_exit(usage(2));
2143    }    }
2144    
2145  return options;  return options;
# Line 1749  return buffer; Line 2177  return buffer;
2177  *          Compile a single pattern              *  *          Compile a single pattern              *
2178  *************************************************/  *************************************************/
2179    
2180  /* When the -F option has been used, this is called for each substring.  /* Do nothing if the pattern has already been compiled. This is the case for
2181  Otherwise it's called for each supplied pattern.  include/exclude patterns read from a file.
2182    
2183    When the -F option has been used, each "pattern" may be a list of strings,
2184    separated by line breaks. They will be matched literally. We split such a
2185    string and compile the first substring, inserting an additional block into the
2186    pattern chain.
2187    
2188  Arguments:  Arguments:
2189    pattern        the pattern string    p              points to the pattern block
2190    options        the PCRE options    options        the PCRE options
2191    filename       the file name, or NULL for a command-line pattern    popts          the processing options
2192      fromfile       TRUE if the pattern was read from a file
2193      fromtext       file name or identifying text (e.g. "include")
2194    count          0 if this is the only command line pattern, or    count          0 if this is the only command line pattern, or
2195                   number of the command line pattern, or                   number of the command line pattern, or
2196                   linenumber for a pattern from a file                   linenumber for a pattern from a file
# Line 1764  Returns:         TRUE on success, FALSE Line 2199  Returns:         TRUE on success, FALSE
2199  */  */
2200    
2201  static BOOL  static BOOL
2202  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_pattern(patstr *p, int options, int popts, int fromfile,
2203      const char *fromtext, int count)
2204  {  {
2205  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
2206  const char *error;  const char *error;
2207    char *ps = p->string;
2208    int patlen = strlen(ps);
2209  int errptr;  int errptr;
2210    
2211  if (pattern_count >= MAX_PATTERN_COUNT)  if (p->compiled != NULL) return TRUE;
   {  
   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",  
     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);  
   return FALSE;  
   }  
2212    
2213  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  if ((popts & PO_FIXED_STRINGS) != 0)
   suffix[process_options]);  
 pattern_list[pattern_count] =  
   pcre_compile(buffer, options, &error, &errptr, pcretables);  
 if (pattern_list[pattern_count] != NULL)  
2214    {    {
2215    pattern_count++;    int ellength;
2216    return TRUE;    char *eop = ps + patlen;
2217      char *pe = end_of_line(ps, eop, &ellength);
2218    
2219      if (ellength != 0)
2220        {
2221        if (add_pattern(pe, p) == NULL) return FALSE;
2222        patlen = (int)(pe - ps - ellength);
2223        }
2224    }    }
2225    
2226    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2227    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2228    if (p->compiled != NULL) return TRUE;
2229    
2230  /* Handle compile errors */  /* Handle compile errors */
2231    
2232  errptr -= (int)strlen(prefix[process_options]);  errptr -= (int)strlen(prefix[popts]);
2233  if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);  if (errptr > patlen) errptr = patlen;
2234    
2235  if (filename == NULL)  if (fromfile)
2236    {    {
2237    if (count == 0)    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2238      fprintf(stderr, "pcregrep: Error in command-line regex "      "at offset %d: %s\n", count, fromtext, errptr, error);
       "at offset %d: %s\n", errptr, error);  
   else  
     fprintf(stderr, "pcregrep: Error in %s command-line regex "  
       "at offset %d: %s\n", ordin(count), errptr, error);  
2239    }    }
2240  else  else
2241    {    {
2242    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "    if (count == 0)
2243      "at offset %d: %s\n", count, filename, errptr, error);      fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2244          fromtext, errptr, error);
2245      else
2246        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2247          ordin(count), fromtext, errptr, error);
2248    }    }
2249    
2250  return FALSE;  return FALSE;
# Line 1813  return FALSE; Line 2253  return FALSE;
2253    
2254    
2255  /*************************************************  /*************************************************
2256  *           Compile one supplied pattern         *  *     Read and compile a file of patterns        *
2257  *************************************************/  *************************************************/
2258    
2259  /* When the -F option has been used, each string may be a list of strings,  /* This is used for --filelist, --include-from, and --exclude-from.
 separated by line breaks. They will be matched literally.  
2260    
2261  Arguments:  Arguments:
2262    pattern        the pattern string    name         the name of the file; "-" is stdin
2263    options        the PCRE options    patptr       pointer to the pattern chain anchor
2264    filename       the file name, or NULL for a command-line pattern    patlastptr   pointer to the last pattern pointer
2265    count          0 if this is the only command line pattern, or    popts        the process options to pass to pattern_compile()
                  number of the command line pattern, or  
                  linenumber for a pattern from a file  
2266    
2267  Returns:         TRUE on success, FALSE after an error  Returns:       TRUE if all went well
2268  */  */
2269    
2270  static BOOL  static BOOL
2271  compile_pattern(char *pattern, int options, char *filename, int count)  read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2272  {  {
2273  if ((process_options & PO_FIXED_STRINGS) != 0)  int linenumber = 0;
2274    FILE *f;
2275    char *filename;
2276    char buffer[PATBUFSIZE];
2277    
2278    if (strcmp(name, "-") == 0)
2279    {    {
2280    char *eop = pattern + strlen(pattern);    f = stdin;
2281    char buffer[MBUFTHIRD];    filename = stdin_name;
2282      }
2283    else
2284      {
2285      f = fopen(name, "r");
2286      if (f == NULL)
2287        {
2288        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2289        return FALSE;
2290        }
2291      filename = name;
2292      }
2293    
2294    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2295      {
2296      char *s = buffer + (int)strlen(buffer);
2297      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2298      *s = 0;
2299      linenumber++;
2300      if (buffer[0] == 0) continue;   /* Skip blank lines */
2301    
2302      /* Note: this call to add_pattern() puts a pointer to the local variable
2303      "buffer" into the pattern chain. However, that pointer is used only when
2304      compiling the pattern, which happens immediately below, so we flatten it
2305      afterwards, as a precaution against any later code trying to use it. */
2306    
2307      *patlastptr = add_pattern(buffer, *patlastptr);
2308      if (*patlastptr == NULL) return FALSE;
2309      if (*patptr == NULL) *patptr = *patlastptr;
2310    
2311      /* This loop is needed because compiling a "pattern" when -F is set may add
2312      on additional literal patterns if the original contains a newline. In the
2313      common case, it never will, because fgets() stops at a newline. However,
2314      the -N option can be used to give pcregrep a different newline setting. */
2315    
2316    for(;;)    for(;;)
2317      {      {
2318      int ellength;      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2319      char *p = end_of_line(pattern, eop, &ellength);          linenumber))
     if (ellength == 0)  
       return compile_single_pattern(pattern, options, filename, count);  
     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);  
     pattern = p;  
     if (!compile_single_pattern(buffer, options, filename, count))  
2320        return FALSE;        return FALSE;
2321        (*patlastptr)->string = NULL;            /* Insurance */
2322        if ((*patlastptr)->next == NULL) break;
2323        *patlastptr = (*patlastptr)->next;
2324      }      }
2325    }    }
2326  else return compile_single_pattern(pattern, options, filename, count);  
2327    if (f != stdin) fclose(f);
2328    return TRUE;
2329  }  }
2330    
2331    
# Line 1865  main(int argc, char **argv) Line 2341  main(int argc, char **argv)
2341  {  {
2342  int i, j;  int i, j;
2343  int rc = 1;  int rc = 1;
 int pcre_options = 0;  
 int cmd_pattern_count = 0;  
 int hint_count = 0;  
 int errptr;  
2344  BOOL only_one_at_top;  BOOL only_one_at_top;
2345  char *patterns[MAX_PATTERN_COUNT];  patstr *cp;
2346    fnstr *fn;
2347  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2348  const char *error;  const char *error;
2349    
2350    #ifdef SUPPORT_PCREGREP_JIT
2351    pcre_jit_stack *jit_stack = NULL;
2352    #endif
2353    
2354  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2355  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2356  Note that the return values from pcre_config(), though derived from the ASCII  Note that the return values from pcre_config(), though derived from the ASCII
# Line 1906  for (i = 1; i < argc; i++) Line 2383  for (i = 1; i < argc; i++)
2383    
2384    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2385      {      {
2386      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_files != NULL || patterns != NULL) break;
2387        else exit(usage(2));        else pcregrep_exit(usage(2));
2388      }      }
2389    
2390    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1929  for (i = 1; i < argc; i++) Line 2406  for (i = 1; i < argc; i++)
2406      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2407      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2408      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2409      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2410      these categories, fortunately. */      both these categories. */
2411    
2412      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2413        {        {
2414        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2415        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2416        if (opbra == NULL)     /* Not a (p) case */  
2417          /* Handle options with only one spelling of the name */
2418    
2419          if (opbra == NULL)     /* Does not contain '(' */
2420          {          {
2421          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2422            {            {
# Line 1944  for (i = 1; i < argc; i++) Line 2424  for (i = 1; i < argc; i++)
2424            }            }
2425          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2426            {            {
2427            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2428            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2429                (int)strlen(arg) : (int)(argequals - arg);
2430            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2431              {              {
2432              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1958  for (i = 1; i < argc; i++) Line 2439  for (i = 1; i < argc; i++)
2439              }              }
2440            }            }
2441          }          }
2442        else                   /* Special case xxxx(p) */  
2443          /* Handle options with an alternate spelling of the name */
2444    
2445          else
2446          {          {
2447          char buff1[24];          char buff1[24];
2448          char buff2[24];          char buff2[24];
2449          int baselen = opbra - op->long_name;  
2450            int baselen = (int)(opbra - op->long_name);
2451            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2452            int arglen = (argequals == NULL || equals == NULL)?
2453              (int)strlen(arg) : (int)(argequals - arg);
2454    
2455          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2456          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2457            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2458          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2459               strncmp(arg, buff2, arglen) == 0)
2460              {
2461              if (equals != NULL && argequals != NULL)
2462                {
2463                option_data = argequals;
2464                if (*option_data == '=')
2465                  {
2466                  option_data++;
2467                  longopwasequals = TRUE;
2468                  }
2469                }
2470            break;            break;
2471              }
2472          }          }
2473        }        }
2474    
2475      if (op->one_char == 0)      if (op->one_char == 0)
2476        {        {
2477        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2478        exit(usage(2));        pcregrep_exit(usage(2));
2479        }        }
2480      }      }
2481    
   
2482    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2483    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2484    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 2012  for (i = 1; i < argc; i++) Line 2512  for (i = 1; i < argc; i++)
2512      while (*s != 0)      while (*s != 0)
2513        {        {
2514        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2515          { if (*s == op->one_char) break; }          {
2516            if (*s == op->one_char) break;
2517            }
2518        if (op->one_char == 0)        if (op->one_char == 0)
2519          {          {
2520          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2521            *s, argv[i]);            *s, argv[i]);
2522          exit(usage(2));          pcregrep_exit(usage(2));
2523            }
2524    
2525          /* Check for a single-character option that has data: OP_OP_NUMBER
2526          is used for one that either has a numerical number or defaults, i.e. the
2527          data is optional. If a digit follows, there is data; if not, carry on
2528          with other single-character options in the same string. */
2529    
2530          option_data = s+1;
2531          if (op->type == OP_OP_NUMBER)
2532            {
2533            if (isdigit((unsigned char)s[1])) break;
2534          }          }
2535        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for end or a dataless option */
2536          {          {
2537          option_data = s+1;          if (op->type != OP_NODATA || s[1] == 0) break;
         break;  
2538          }          }
2539    
2540          /* Handle a single-character option with no data, then loop for the
2541          next character in the string. */
2542    
2543        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2544        }        }
2545      }      }
# Line 2040  for (i = 1; i < argc; i++) Line 2556  for (i = 1; i < argc; i++)
2556    
2557    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2558    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2559    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2560    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2561    
2562    if (*option_data == 0 &&    if (*option_data == 0 &&
2563        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 2051  for (i = 1; i < argc; i++) Line 2567  for (i = 1; i < argc; i++)
2567        case N_COLOUR:        case N_COLOUR:
2568        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2569        break;        break;
2570    
2571          case 'o':
2572          only_matching = 0;
2573          break;
2574    
2575  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2576        case 'S':        case 'S':
2577        S_arg = 0;        S_arg = 0;
# Line 2067  for (i = 1; i < argc; i++) Line 2588  for (i = 1; i < argc; i++)
2588      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2589        {        {
2590        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2591        exit(usage(2));        pcregrep_exit(usage(2));
2592        }        }
2593      option_data = argv[++i];      option_data = argv[++i];
2594      }      }
2595    
2596    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_PATLIST, it's the -e option, or one of the
2597    multiple times to create a list of patterns. */    include/exclude options, which can be called multiple times to create lists
2598      of patterns. */
2599    
2600    if (op->type == OP_PATLIST)    if (op->type == OP_PATLIST)
2601         {
2602         patdatastr *pd = (patdatastr *)op->dataptr;
2603         *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2604         if (*(pd->lastptr) == NULL) goto EXIT2;
2605         if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2606         }
2607    
2608      /* If the option type is OP_FILELIST, it's one of the options that names a
2609      file. */
2610    
2611      else if (op->type == OP_FILELIST)
2612        {
2613        fndatastr *fd = (fndatastr *)op->dataptr;
2614        fn = (fnstr *)malloc(sizeof(fnstr));
2615        if (fn == NULL)
2616          {
2617          fprintf(stderr, "pcregrep: malloc failed\n");
2618          goto EXIT2;
2619          }
2620        fn->next = NULL;
2621        fn->name = option_data;
2622        if (*(fd->anchor) == NULL)
2623          *(fd->anchor) = fn;
2624        else
2625          (*(fd->lastptr))->next = fn;
2626        *(fd->lastptr) = fn;
2627        }
2628    
2629      /* Handle OP_BINARY_FILES */
2630    
2631      else if (op->type == OP_BINFILES)
2632      {      {
2633      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      if (strcmp(option_data, "binary") == 0)
2634          binary_files = BIN_BINARY;
2635        else if (strcmp(option_data, "without-match") == 0)
2636          binary_files = BIN_NOMATCH;
2637        else if (strcmp(option_data, "text") == 0)
2638          binary_files = BIN_TEXT;
2639        else
2640        {        {
2641        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",        fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2642          MAX_PATTERN_COUNT);          option_data);
2643        return 2;        pcregrep_exit(usage(2));
2644        }        }
     patterns[cmd_pattern_count++] = option_data;  
2645      }      }
2646    
2647    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
2648    
2649    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2650               op->type != OP_OP_NUMBER)
2651      {      {
2652      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2653      }      }
2654    
2655      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2656      only for unpicking arguments, so just keep it simple. */
2657    
2658    else    else
2659      {      {
2660      char *endptr;      unsigned long int n = 0;
2661      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2662        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2663        while (isdigit((unsigned char)(*endptr)))
2664          n = n * 10 + (int)(*endptr++ - '0');
2665        if (toupper(*endptr) == 'K')
2666          {
2667          n *= 1024;
2668          endptr++;
2669          }
2670        else if (toupper(*endptr) == 'M')
2671          {
2672          n *= 1024*1024;
2673          endptr++;
2674          }
2675      if (*endptr != 0)      if (*endptr != 0)
2676        {        {
2677        if (longop)        if (longop)
2678          {          {
2679          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2680          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2681            equals - op->long_name;            (int)(equals - op->long_name);
2682          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2683            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2684          }          }
2685        else        else
2686          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2687            option_data, op->one_char);            option_data, op->one_char);
2688        exit(usage(2));        pcregrep_exit(usage(2));
2689        }        }
2690      *((int *)op->dataptr) = n;      if (op->type == OP_LONGNUMBER)
2691            *((unsigned long int *)op->dataptr) = n;
2692        else
2693            *((int *)op->dataptr) = n;
2694      }      }
2695    }    }
2696    
# Line 2125  if (both_context > 0) Line 2704  if (both_context > 0)
2704    }    }
2705    
2706  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2707  However, the latter two set the only_matching flag. */  However, the latter two set only_matching. */
2708    
2709  if ((only_matching && (file_offsets || line_offsets)) ||  if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2710      (file_offsets && line_offsets))      (file_offsets && line_offsets))
2711    {    {
2712    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2713      "and/or --line-offsets\n");      "and/or --line-offsets\n");
2714    exit(usage(2));    pcregrep_exit(usage(2));
2715    }    }
2716    
2717  if (file_offsets || line_offsets) only_matching = TRUE;  if (file_offsets || line_offsets) only_matching = 0;
2718    
2719  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2720  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2259  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 2838  if (jfriedl_XT != 0 || jfriedl_XR != 0)
2838    }    }
2839  #endif  #endif
2840    
2841  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer. */
2842    
2843  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  bufsize = 3*bufthird;
2844  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  main_buffer = (char *)malloc(bufsize);
2845    
2846  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL)
2847    {    {
2848    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2849    goto EXIT2;    goto EXIT2;
2850    }    }
2851    
2852  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there are no files provided by -f,
2853  the first argument is the one and only pattern, and it must exist. */  the first argument is the one and only pattern, and it must exist. */
2854    
2855  if (cmd_pattern_count == 0 && pattern_filename == NULL)  if (patterns == NULL && pattern_files == NULL)
2856    {    {
2857    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2858    patterns[cmd_pattern_count++] = argv[i++];    patterns = patterns_last = add_pattern(argv[i++], NULL);
2859      if (patterns == NULL) goto EXIT2;
2860    }    }
2861    
2862  /* Compile the patterns that were provided on the command line, either by  /* Compile the patterns that were provided on the command line, either by
2863  multiple uses of -e or as a single unkeyed pattern. */  multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2864    after all the command-line options are read so that we know which PCRE options
2865    to use. When -F is used, compile_pattern() may add another block into the
2866    chain, so we must not access the next pointer till after the compile. */
2867    
2868  for (j = 0; j < cmd_pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2869    {    {
2870    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
2871         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 1 && patterns->next == NULL)? 0 : j))
2872      goto EXIT2;      goto EXIT2;
2873    }    }
2874    
2875  /* Compile the regular expressions that are provided in a file. */  /* Read and compile the regular expressions that are provided in files. */
2876    
2877  if (pattern_filename != NULL)  for (fn = pattern_files; fn != NULL; fn = fn->next)
2878    {    {
2879    int linenumber = 0;    if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
2880    FILE *f;      goto EXIT2;
   char *filename;  
   char buffer[MBUFTHIRD];  
   
   if (strcmp(pattern_filename, "-") == 0)  
     {  
     f = stdin;  
     filename = stdin_name;  
     }  
   else  
     {  
     f = fopen(pattern_filename, "r");  
     if (f == NULL)  
       {  
       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,  
         strerror(errno));  
       goto EXIT2;  
       }  
     filename = pattern_filename;  
     }  
   
   while (fgets(buffer, MBUFTHIRD, f) != NULL)  
     {  
     char *s = buffer + (int)strlen(buffer);  
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;  
     *s = 0;  
     linenumber++;  
     if (buffer[0] == 0) continue;   /* Skip blank lines */  
     if (!compile_pattern(buffer, pcre_options, filename, linenumber))  
       goto EXIT2;  
     }  
   
   if (f != stdin) fclose(f);  
2881    }    }
2882    
2883  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times. Unless
2884    JIT has been explicitly disabled, arrange a stack for it to use. */
2885    
2886  for (j = 0; j < pattern_count; j++)  #ifdef SUPPORT_PCREGREP_JIT
2887    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2888      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2889    #endif
2890    
2891    for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2892    {    {
2893    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
2894    if (error != NULL)    if (error != NULL)
2895      {      {
2896      char s[16];      char s[16];
2897      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
2898      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2899      goto EXIT2;      goto EXIT2;
2900      }      }
2901    hint_count++;  #ifdef SUPPORT_PCREGREP_JIT
2902      if (jit_stack != NULL && cp->hint != NULL)
2903        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
2904    #endif
2905    }    }
2906    
2907  /* If there are include or exclude patterns, compile them. */  /* If --match-limit or --recursion-limit was set, put the value(s) into the
2908    pcre_extra block for each pattern. */
2909    
2910  if (exclude_pattern != NULL)  if (match_limit > 0 || match_limit_recursion > 0)
2911    {    {
2912    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,    for (cp = patterns; cp != NULL; cp = cp->next)
     pcretables);  
   if (exclude_compiled == NULL)  
2913      {      {
2914      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      if (cp->hint == NULL)
2915        errptr, error);        {
2916      goto EXIT2;        cp->hint = (pcre_extra *)malloc(sizeof(pcre_extra));
2917          if (cp->hint == NULL)
2918            {
2919            fprintf(stderr, "pcregrep: malloc failed\n");
2920            pcregrep_exit(2);
2921            }
2922          }
2923        if (match_limit > 0)
2924          {
2925          cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
2926          cp->hint->match_limit = match_limit;
2927          }
2928        if (match_limit_recursion > 0)
2929          {
2930          cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2931          cp->hint->match_limit_recursion = match_limit_recursion;
2932          }
2933      }      }
2934    }    }
2935    
2936  if (include_pattern != NULL)  /* If there are include or exclude patterns read from the command line, compile
2937    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
2938    0. */
2939    
2940    for (j = 0; j < 4; j++)
2941    {    {
2942    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,    int k;
2943      pcretables);    for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
   if (include_compiled == NULL)  
2944      {      {
2945      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
2946        errptr, error);           (k == 1 && cp->next == NULL)? 0 : k))
2947      goto EXIT2;        goto EXIT2;
2948      }      }
2949    }    }
2950    
2951  if (exclude_dir_pattern != NULL)  /* Read and compile include/exclude patterns from files. */
2952    
2953    for (fn = include_from; fn != NULL; fn = fn->next)
2954    {    {
2955    exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,    if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
     pcretables);  
   if (exclude_dir_compiled == NULL)  
     {  
     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",  
       errptr, error);  
2956      goto EXIT2;      goto EXIT2;
     }  
2957    }    }
2958    
2959  if (include_dir_pattern != NULL)  for (fn = exclude_from; fn != NULL; fn = fn->next)
2960    {    {
2961    include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,    if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
     pcretables);  
   if (include_dir_compiled == NULL)  
     {  
     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",  
       errptr, error);  
2962      goto EXIT2;      goto EXIT2;
     }  
2963    }    }
2964    
2965  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no files that contain lists of files to search, and there are
2966    no file arguments, search stdin, and then exit. */
2967    
2968  if (i >= argc)  if (file_lists == NULL && i >= argc)
2969    {    {
2970    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2971        (filenames > FN_DEFAULT)? stdin_name : NULL);
2972    goto EXIT;    goto EXIT;
2973    }    }
2974    
2975  /* Otherwise, work through the remaining arguments as files or directories.  /* If any files that contains a list of files to search have been specified,
2976  Pass in the fact that there is only one argument at top level - this suppresses  read them line by line and search the given files. */
2977  the file name if the argument is not a directory and filenames are not  
2978  otherwise forced. */  for (fn = file_lists; fn != NULL; fn = fn->next)
2979      {
2980      char buffer[PATBUFSIZE];
2981      FILE *fl;
2982      if (strcmp(fn->name, "-") == 0) fl = stdin; else
2983        {
2984        fl = fopen(fn->name, "rb");
2985        if (fl == NULL)
2986          {
2987          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
2988            strerror(errno));
2989          goto EXIT2;
2990          }
2991        }
2992      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2993        {
2994        int frc;
2995        char *end = buffer + (int)strlen(buffer);
2996        while (end > buffer && isspace(end[-1])) end--;
2997        *end = 0;
2998        if (*buffer != 0)
2999          {
3000          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3001          if (frc > 1) rc = frc;
3002            else if (frc == 0 && rc == 1) rc = 0;
3003          }
3004        }
3005      if (fl != stdin) fclose(fl);
3006      }
3007    
3008    /* After handling file-list, work through remaining arguments. Pass in the fact
3009    that there is only one argument at top level - this suppresses the file name if
3010    the argument is not a directory and filenames are not otherwise forced. */
3011    
3012  only_one_at_top = i == argc - 1;   /* Catch initial value of i */  only_one_at_top = i == argc - 1 && file_lists == NULL;
3013    
3014  for (; i < argc; i++)  for (; i < argc; i++)
3015    {    {
# Line 2418  for (; i < argc; i++) Line 3020  for (; i < argc; i++)
3020    }    }
3021    
3022  EXIT:  EXIT:
3023  if (pattern_list != NULL)  #ifdef SUPPORT_PCREGREP_JIT
3024    {  if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3025    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);  #endif
3026    free(pattern_list);  
3027    }  if (main_buffer != NULL) free(main_buffer);
3028  if (hints_list != NULL)  
3029    {  free_pattern_chain(patterns);
3030    for (i = 0; i < hint_count; i++) free(hints_list[i]);  free_pattern_chain(include_patterns);
3031    free(hints_list);  free_pattern_chain(include_dir_patterns);
3032    }  free_pattern_chain(exclude_patterns);
3033  return rc;  free_pattern_chain(exclude_dir_patterns);
3034    
3035    free_file_chain(exclude_from);
3036    free_file_chain(include_from);
3037    free_file_chain(pattern_files);
3038    free_file_chain(file_lists);
3039    
3040    pcregrep_exit(rc);
3041    
3042  EXIT2:  EXIT2:
3043  rc = 2;  rc = 2;

Legend:
Removed from v.421  
changed lines
  Added in v.1033

  ViewVC Help
Powered by ViewVC 1.1.5