/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 835 by ph10, Wed Dec 28 16:10:09 2011 UTC revision 1033 by ph10, Mon Sep 10 11:02:48 2012 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2011 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 70  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define MAX_PATTERN_COUNT 100  
73  #define OFFSET_SIZE 99  #define OFFSET_SIZE 99
74    
75  #if BUFSIZ > 8192  #if BUFSIZ > 8192
76  #define PATBUFSIZE BUFSIZ  #define MAXPATLEN BUFSIZ
77  #else  #else
78  #define PATBUFSIZE 8192  #define MAXPATLEN 8192
79  #endif  #endif
80    
81    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
82    
83  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
84  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
85  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
# Line 104  enum { DEE_READ, DEE_SKIP }; Line 105  enum { DEE_READ, DEE_SKIP };
105    
106  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107    
108    /* Binary file options */
109    
110    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111    
112  /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some  /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113  environments), a warning is issued if the value of fwrite() is ignored.  environments), a warning is issued if the value of fwrite() is ignored.
114  Unfortunately, casting to (void) does not suppress the warning. To get round  Unfortunately, casting to (void) does not suppress the warning. To get round
# Line 135  static char *colour_string = (char *)"1; Line 140  static char *colour_string = (char *)"1;
140  static char *colour_option = NULL;  static char *colour_option = NULL;
141  static char *dee_option = NULL;  static char *dee_option = NULL;
142  static char *DEE_option = NULL;  static char *DEE_option = NULL;
143    static char *locale = NULL;
144  static char *main_buffer = NULL;  static char *main_buffer = NULL;
145  static char *newline = NULL;  static char *newline = NULL;
 static char *pattern_filename = NULL;  
146  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
 static char *locale = NULL;  
147    
148  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
149    
 static int  pattern_count = 0;  
 static pcre **pattern_list = NULL;  
 static pcre_extra **hints_list = NULL;  
   
 static char *include_pattern = NULL;  
 static char *exclude_pattern = NULL;  
 static char *include_dir_pattern = NULL;  
 static char *exclude_dir_pattern = NULL;  
   
 static pcre *include_compiled = NULL;  
 static pcre *exclude_compiled = NULL;  
 static pcre *include_dir_compiled = NULL;  
 static pcre *exclude_dir_compiled = NULL;  
   
150  static int after_context = 0;  static int after_context = 0;
151  static int before_context = 0;  static int before_context = 0;
152    static int binary_files = BIN_BINARY;
153  static int both_context = 0;  static int both_context = 0;
154  static int bufthird = PCREGREP_BUFSIZE;  static int bufthird = PCREGREP_BUFSIZE;
155  static int bufsize = 3*PCREGREP_BUFSIZE;  static int bufsize = 3*PCREGREP_BUFSIZE;
156    
157    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
158    static int dee_action = dee_SKIP;
159    #else
160  static int dee_action = dee_READ;  static int dee_action = dee_READ;
161    #endif
162    
163  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
164  static int error_count = 0;  static int error_count = 0;
165  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
166  static int only_matching = -1;  static int only_matching = -1;
167    static int pcre_options = 0;
168  static int process_options = 0;  static int process_options = 0;
169    
170  #ifdef SUPPORT_PCREGREP_JIT  #ifdef SUPPORT_PCREGREP_JIT
# Line 193  static BOOL quiet = FALSE; Line 191  static BOOL quiet = FALSE;
191  static BOOL silent = FALSE;  static BOOL silent = FALSE;
192  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
193    
194    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
195    
196    typedef struct fnstr {
197      struct fnstr *next;
198      char *name;
199    } fnstr;
200    
201    static fnstr *exclude_from = NULL;
202    static fnstr *exclude_from_last = NULL;
203    static fnstr *include_from = NULL;
204    static fnstr *include_from_last = NULL;
205    
206    static fnstr *file_lists = NULL;
207    static fnstr *file_lists_last = NULL;
208    static fnstr *pattern_files = NULL;
209    static fnstr *pattern_files_last = NULL;
210    
211    /* Structure for holding the two variables that describe a file name chain. */
212    
213    typedef struct fndatastr {
214      fnstr **anchor;
215      fnstr **lastptr;
216    } fndatastr;
217    
218    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
219    static fndatastr include_from_data = { &include_from, &include_from_last };
220    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
221    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
222    
223    /* Structure for pattern and its compiled form; used for matching patterns and
224    also for include/exclude patterns. */
225    
226    typedef struct patstr {
227      struct patstr *next;
228      char *string;
229      pcre *compiled;
230      pcre_extra *hint;
231    } patstr;
232    
233    static patstr *patterns = NULL;
234    static patstr *patterns_last = NULL;
235    static patstr *include_patterns = NULL;
236    static patstr *include_patterns_last = NULL;
237    static patstr *exclude_patterns = NULL;
238    static patstr *exclude_patterns_last = NULL;
239    static patstr *include_dir_patterns = NULL;
240    static patstr *include_dir_patterns_last = NULL;
241    static patstr *exclude_dir_patterns = NULL;
242    static patstr *exclude_dir_patterns_last = NULL;
243    
244    /* Structure holding the two variables that describe a pattern chain. A pointer
245    to such structures is used for each appropriate option. */
246    
247    typedef struct patdatastr {
248      patstr **anchor;
249      patstr **lastptr;
250    } patdatastr;
251    
252    static patdatastr match_patdata = { &patterns, &patterns_last };
253    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
254    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
255    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
256    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
257    
258    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
259                                     &include_dir_patterns, &exclude_dir_patterns };
260    
261    static const char *incexname[4] = { "--include", "--exclude",
262                                        "--include-dir", "--exclude-dir" };
263    
264  /* Structure for options and list of them */  /* Structure for options and list of them */
265    
266  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
267         OP_OP_NUMBER, OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST, OP_FILELIST, OP_BINFILES };
268    
269  typedef struct option_item {  typedef struct option_item {
270    int type;    int type;
# Line 225  used to identify them. */ Line 293  used to identify them. */
293  #define N_M_LIMIT_REC  (-14)  #define N_M_LIMIT_REC  (-14)
294  #define N_BUFSIZE      (-15)  #define N_BUFSIZE      (-15)
295  #define N_NOJIT        (-16)  #define N_NOJIT        (-16)
296    #define N_FILE_LIST    (-17)
297    #define N_BINARY_FILES (-18)
298    #define N_EXCLUDE_FROM (-19)
299    #define N_INCLUDE_FROM (-20)
300    
301  static option_item optionlist[] = {  static option_item optionlist[] = {
302    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
303    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
304    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
305      { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
306    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
307      { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
308    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
309    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
310    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
# Line 238  static option_item optionlist[] = { Line 312  static option_item optionlist[] = {
312    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
313    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
314    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
315    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
316    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
317    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
318      { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
319    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
320    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
321    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
322      { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
323    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
324  #ifdef SUPPORT_PCREGREP_JIT  #ifdef SUPPORT_PCREGREP_JIT
325    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
# Line 264  static option_item optionlist[] = { Line 340  static option_item optionlist[] = {
340    { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },    { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
341    { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
342    { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
343    { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
344    { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
345    { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },    { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
346    { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },    { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
347      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
348      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
349    
350    /* These two were accidentally implemented with underscores instead of    /* These two were accidentally implemented with underscores instead of
351    hyphens in the option names. As this was not discovered for several releases,    hyphens in the option names. As this was not discovered for several releases,
352    the incorrect versions are left in the table for compatibility. However, the    the incorrect versions are left in the table for compatibility. However, the
353    --help function misses out any option that has an underscore in its name. */    --help function misses out any option that has an underscore in its name. */
354    
355    { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },    { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
356    { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },    { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
357    
358  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
359    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
# Line 292  static option_item optionlist[] = { Line 370  static option_item optionlist[] = {
370  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
371  options. These set the 1, 2, and 4 bits in process_options, respectively. Note  options. These set the 1, 2, and 4 bits in process_options, respectively. Note
372  that the combination of -w and -x has the same effect as -x on its own, so we  that the combination of -w and -x has the same effect as -x on its own, so we
373  can treat them as the same. */  can treat them as the same. Note that the MAXPATLEN macro assumes the longest
374    prefix+suffix is 10 characters; if anything longer is added, it must be
375    adjusted. */
376    
377  static const char *prefix[] = {  static const char *prefix[] = {
378    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
# Line 313  const char utf8_table4[] = { Line 393  const char utf8_table4[] = {
393    
394    
395  /*************************************************  /*************************************************
396    *          Add item to chain of patterns         *
397    *************************************************/
398    
399    /* Used to add an item onto a chain, or just return an unconnected item if the
400    "after" argument is NULL.
401    
402    Arguments:
403      s          pattern string to add
404      after      if not NULL points to item to insert after
405    
406    Returns:     new pattern block, or NULL after malloc failure
407    */
408    
409    static patstr *
410    add_pattern(char *s, patstr *after)
411    {
412    patstr *p = (patstr *)malloc(sizeof(patstr));
413    if (p == NULL)
414      {
415      fprintf(stderr, "pcregrep: malloc failed\n");
416      return NULL;
417      }
418    if (strlen(s) > MAXPATLEN)
419      {
420      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
421        MAXPATLEN);
422      return NULL;
423      }
424    p->next = NULL;
425    p->string = s;
426    p->compiled = NULL;
427    p->hint = NULL;
428    
429    if (after != NULL)
430      {
431      p->next = after->next;
432      after->next = p;
433      }
434    return p;
435    }
436    
437    
438    /*************************************************
439    *           Free chain of patterns               *
440    *************************************************/
441    
442    /* Used for several chains of patterns.
443    
444    Argument: pointer to start of chain
445    Returns:  nothing
446    */
447    
448    static void
449    free_pattern_chain(patstr *pc)
450    {
451    while (pc != NULL)
452      {
453      patstr *p = pc;
454      pc = p->next;
455      if (p->hint != NULL) pcre_free_study(p->hint);
456      if (p->compiled != NULL) pcre_free(p->compiled);
457      free(p);
458      }
459    }
460    
461    
462    /*************************************************
463    *           Free chain of file names             *
464    *************************************************/
465    
466    /*
467    Argument: pointer to start of chain
468    Returns:  nothing
469    */
470    
471    static void
472    free_file_chain(fnstr *fn)
473    {
474    while (fn != NULL)
475      {
476      fnstr *f = fn;
477      fn = f->next;
478      free(f);
479      }
480    }
481    
482    
483    /*************************************************
484  *         Exit from the program                  *  *         Exit from the program                  *
485  *************************************************/  *************************************************/
486    
# Line 353  although at present the only ones are fo Line 521  although at present the only ones are fo
521  #include <dirent.h>  #include <dirent.h>
522    
523  typedef DIR directory_type;  typedef DIR directory_type;
524    #define FILESEP '/'
525    
526  static int  static int
527  isdirectory(char *filename)  isdirectory(char *filename)
# Line 360  isdirectory(char *filename) Line 529  isdirectory(char *filename)
529  struct stat statbuf;  struct stat statbuf;
530  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
531    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
532  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
533  }  }
534    
535  static directory_type *  static directory_type *
# Line 447  BOOL first; Line 616  BOOL first;
616  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
617  } directory_type;  } directory_type;
618    
619    #define FILESEP '/'
620    
621  int  int
622  isdirectory(char *filename)  isdirectory(char *filename)
623  {  {
624  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
625  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
626    return 0;    return 0;
627  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
628  }  }
629    
630  directory_type *  directory_type *
# Line 464  char *pattern; Line 635  char *pattern;
635  directory_type *dir;  directory_type *dir;
636  DWORD err;  DWORD err;
637  len = strlen(filename);  len = strlen(filename);
638  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
639  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
640  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
641    {    {
642    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
# Line 551  return FALSE; Line 722  return FALSE;
722    
723  #else  #else
724    
725    #define FILESEP 0
726  typedef void directory_type;  typedef void directory_type;
727    
728  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
# Line 607  return sys_errlist[n]; Line 779  return sys_errlist[n];
779    
780    
781  /*************************************************  /*************************************************
782    *            Test exclude/includes               *
783    *************************************************/
784    
785    /* If any exclude pattern matches, the path is excluded. Otherwise, unless
786    there are no includes, the path must match an include pattern.
787    
788    Arguments:
789      path      the path to be matched
790      ip        the chain of include patterns
791      ep        the chain of exclude patterns
792    
793    Returns:    TRUE if the path is not excluded
794    */
795    
796    static BOOL
797    test_incexc(char *path, patstr *ip, patstr *ep)
798    {
799    int plen = strlen(path);
800    
801    for (; ep != NULL; ep = ep->next)
802      {
803      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
804        return FALSE;
805      }
806    
807    if (ip == NULL) return TRUE;
808    
809    for (; ip != NULL; ip = ip->next)
810      {
811      if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
812        return TRUE;
813      }
814    
815    return FALSE;
816    }
817    
818    
819    
820    /*************************************************
821  *            Read one line of input              *  *            Read one line of input              *
822  *************************************************/  *************************************************/
823    
# Line 625  Arguments: Line 836  Arguments:
836  Returns:     the number of characters read, zero at end of file  Returns:     the number of characters read, zero at end of file
837  */  */
838    
839  static int  static unsigned int
840  read_one_line(char *buffer, int length, FILE *f)  read_one_line(char *buffer, int length, FILE *f)
841  {  {
842  int c;  int c;
# Line 722  switch(endlinetype) Line 933  switch(endlinetype)
933    
934      switch (c)      switch (c)
935        {        {
936        case 0x0a:    /* LF */        case '\n':
937        *lenptr = 1;        *lenptr = 1;
938        return p;        return p;
939    
940        case 0x0d:    /* CR */        case '\r':
941        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
942          {          {
943          *lenptr = 2;          *lenptr = 2;
944          p++;          p++;
# Line 766  switch(endlinetype) Line 977  switch(endlinetype)
977    
978      switch (c)      switch (c)
979        {        {
980        case 0x0a:    /* LF */        case '\n':    /* LF */
981        case 0x0b:    /* VT */        case '\v':    /* VT */
982        case 0x0c:    /* FF */        case '\f':    /* FF */
983        *lenptr = 1;        *lenptr = 1;
984        return p;        return p;
985    
986        case 0x0d:    /* CR */        case '\r':    /* CR */
987        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
988          {          {
989          *lenptr = 2;          *lenptr = 2;
990          p++;          p++;
# Line 781  switch(endlinetype) Line 992  switch(endlinetype)
992        else *lenptr = 1;        else *lenptr = 1;
993        return p;        return p;
994    
995        case 0x85:    /* NEL */  #ifndef EBCDIC
996          case 0x85:    /* Unicode NEL */
997        *lenptr = utf8? 2 : 1;        *lenptr = utf8? 2 : 1;
998        return p;        return p;
999    
1000        case 0x2028:  /* LS */        case 0x2028:  /* Unicode LS */
1001        case 0x2029:  /* PS */        case 0x2029:  /* Unicode PS */
1002        *lenptr = 3;        *lenptr = 3;
1003        return p;        return p;
1004    #endif  /* Not EBCDIC */
1005    
1006        default:        default:
1007        break;        break;
# Line 872  switch(endlinetype) Line 1085  switch(endlinetype)
1085    
1086      if (endlinetype == EL_ANYCRLF) switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
1087        {        {
1088        case 0x0a:    /* LF */        case '\n':    /* LF */
1089        case 0x0d:    /* CR */        case '\r':    /* CR */
1090        return p;        return p;
1091    
1092        default:        default:
# Line 882  switch(endlinetype) Line 1095  switch(endlinetype)
1095    
1096      else switch (c)      else switch (c)
1097        {        {
1098        case 0x0a:    /* LF */        case '\n':    /* LF */
1099        case 0x0b:    /* VT */        case '\v':    /* VT */
1100        case 0x0c:    /* FF */        case '\f':    /* FF */
1101        case 0x0d:    /* CR */        case '\r':    /* CR */
1102        case 0x85:    /* NEL */  #ifndef EBCDIE
1103        case 0x2028:  /* LS */        case 0x85:    /* Unicode NEL */
1104        case 0x2029:  /* PS */        case 0x2028:  /* Unicode LS */
1105          case 0x2029:  /* Unicode PS */
1106    #endif  /* Not EBCDIC */
1107        return p;        return p;
1108    
1109        default:        default:
# Line 923  Arguments: Line 1138  Arguments:
1138  Returns:            nothing  Returns:            nothing
1139  */  */
1140    
1141  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,  static void
1142    char *endptr, char *printname)  do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1143      char *printname)
1144  {  {
1145  if (after_context > 0 && lastmatchnumber > 0)  if (after_context > 0 && lastmatchnumber > 0)
1146    {    {
# Line 971  match_patterns(char *matchptr, size_t le Line 1187  match_patterns(char *matchptr, size_t le
1187  {  {
1188  int i;  int i;
1189  size_t slen = length;  size_t slen = length;
1190    patstr *p = patterns;
1191  const char *msg = "this text:\n\n";  const char *msg = "this text:\n\n";
1192    
1193  if (slen > 200)  if (slen > 200)
1194    {    {
1195    slen = 200;    slen = 200;
1196    msg = "text that starts:\n\n";    msg = "text that starts:\n\n";
1197    }    }
1198  for (i = 0; i < pattern_count; i++)  for (i = 1; p != NULL; p = p->next, i++)
1199    {    {
1200    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,    *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1201      startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);      startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1202    if (*mrc >= 0) return TRUE;    if (*mrc >= 0) return TRUE;
1203    if (*mrc == PCRE_ERROR_NOMATCH) continue;    if (*mrc == PCRE_ERROR_NOMATCH) continue;
1204    fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);    fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1205    if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);    if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1206    fprintf(stderr, "%s", msg);    fprintf(stderr, "%s", msg);
1207    FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */    FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1208    fprintf(stderr, "\n\n");    fprintf(stderr, "\n\n");
# Line 1044  char *lastmatchrestart = NULL; Line 1262  char *lastmatchrestart = NULL;
1262  char *ptr = main_buffer;  char *ptr = main_buffer;
1263  char *endptr;  char *endptr;
1264  size_t bufflength;  size_t bufflength;
1265    BOOL binary = FALSE;
1266  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1267  BOOL input_line_buffered = line_buffered;  BOOL input_line_buffered = line_buffered;
1268  FILE *in = NULL;                    /* Ensure initialized */  FILE *in = NULL;                    /* Ensure initialized */
# Line 1091  else Line 1310  else
1310    
1311  endptr = main_buffer + bufflength;  endptr = main_buffer + bufflength;
1312    
1313    /* Unless binary-files=text, see if we have a binary file. This uses the same
1314    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1315    file. */
1316    
1317    if (binary_files != BIN_TEXT)
1318      {
1319      binary =
1320        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1321      if (binary && binary_files == BIN_NOMATCH) return 1;
1322      }
1323    
1324  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1325  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
1326  file, but ptr will never get there, because as soon as it gets over 2/3 of the  file, but ptr will never get there, because as soon as it gets over 2/3 of the
# Line 1167  while (ptr < endptr) Line 1397  while (ptr < endptr)
1397    
1398    
1399        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1400            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,            match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1401                PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);                PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1402    
1403        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
# Line 1207  while (ptr < endptr) Line 1437  while (ptr < endptr)
1437    
1438      if (count_only) count++;      if (count_only) count++;
1439    
1440        /* When handling a binary file and binary-files==binary, the "binary"
1441        variable will be set true (it's false in all other cases). In this
1442        situation we just want to output the file name. No need to scan further. */
1443    
1444        else if (binary)
1445          {
1446          fprintf(stdout, "Binary file %s matches\n", filename);
1447          return 0;
1448          }
1449    
1450      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1451      in the file. */      in the file. */
1452    
# Line 1410  while (ptr < endptr) Line 1650  while (ptr < endptr)
1650          and its line-ending characters (if they matched the pattern), so there          and its line-ending characters (if they matched the pattern), so there
1651          may be no more to print. */          may be no more to print. */
1652    
1653          plength = (linelength + endlinelength) - startoffset;          plength = (int)((linelength + endlinelength) - startoffset);
1654          if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);          if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1655          }          }
1656    
# Line 1462  while (ptr < endptr) Line 1702  while (ptr < endptr)
1702    
1703    if (input_line_buffered && bufflength < (size_t)bufsize)    if (input_line_buffered && bufflength < (size_t)bufsize)
1704      {      {
1705      int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);      int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1706      bufflength += add;      bufflength += add;
1707      endptr += add;      endptr += add;
1708      }      }
# Line 1560  Arguments: Line 1800  Arguments:
1800    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1801    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1802    
1803  Returns:   0 if there was at least one match  Returns:  -1 the file/directory was skipped
1804               0 if there was at least one match
1805             1 if there were no matches             1 if there were no matches
1806             2 there was some kind of error             2 there was some kind of error
1807    
# Line 1571  static int Line 1812  static int
1812  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1813  {  {
1814  int rc = 1;  int rc = 1;
 int sep;  
1815  int frtype;  int frtype;
 int pathlen;  
1816  void *handle;  void *handle;
1817    char *lastcomp;
1818  FILE *in = NULL;           /* Ensure initialized */  FILE *in = NULL;           /* Ensure initialized */
1819    
1820  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 1585  gzFile ingz = NULL; Line 1825  gzFile ingz = NULL;
1825  BZFILE *inbz2 = NULL;  BZFILE *inbz2 = NULL;
1826  #endif  #endif
1827    
1828    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1829    int pathlen;
1830    #endif
1831    
1832  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1833    
1834  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
# Line 1594  if (strcmp(pathname, "-") == 0) Line 1838  if (strcmp(pathname, "-") == 0)
1838        stdin_name : NULL);        stdin_name : NULL);
1839    }    }
1840    
1841  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
1842  each file and directory within it, subject to any include or exclude patterns  directories, whereas --include and --exclude apply to everything else. The test
1843  that were set. The scanning code is localized so it can be made  is against the final component of the path. */
1844  system-specific. */  
1845    lastcomp = strrchr(pathname, FILESEP);
1846    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
1847    
1848    /* If the file is a directory, skip if not recursing or if explicitly excluded.
1849    Otherwise, scan the directory and recurse for each path within it. The scanning
1850    code is localized so it can be made system-specific. */
1851    
1852    if (isdirectory(pathname))
1853      {
1854      if (dee_action == dee_SKIP ||
1855          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
1856        return -1;
1857    
 if ((sep = isdirectory(pathname)) != 0)  
   {  
   if (dee_action == dee_SKIP) return 1;  
1858    if (dee_action == dee_RECURSE)    if (dee_action == dee_RECURSE)
1859      {      {
1860      char buffer[1024];      char buffer[1024];
# Line 1618  if ((sep = isdirectory(pathname)) != 0) Line 1871  if ((sep = isdirectory(pathname)) != 0)
1871    
1872      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1873        {        {
1874        int frc, nflen;        int frc;
1875        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
       nflen = (int)(strlen(nextfile));  
   
       if (isdirectory(buffer))  
         {  
         if (exclude_dir_compiled != NULL &&  
             pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)  
           continue;  
   
         if (include_dir_compiled != NULL &&  
             pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)  
           continue;  
         }  
       else  
         {  
         if (exclude_compiled != NULL &&  
             pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)  
           continue;  
   
         if (include_compiled != NULL &&  
             pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)  
           continue;  
         }  
   
1876        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1877        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
1878         else if (frc == 0 && rc == 1) rc = 0;         else if (frc == 0 && rc == 1) rc = 0;
# Line 1654  if ((sep = isdirectory(pathname)) != 0) Line 1884  if ((sep = isdirectory(pathname)) != 0)
1884    }    }
1885    
1886  /* If the file is not a directory and not a regular file, skip it if that's  /* If the file is not a directory and not a regular file, skip it if that's
1887  been requested. */  been requested. Otherwise, check for explicit include/exclude. */
1888    
1889  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;  else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
1890              !test_incexc(lastcomp, include_patterns, exclude_patterns))
1891            return -1;
1892    
1893  /* Control reaches here if we have a regular file, or if we have a directory  /* Control reaches here if we have a regular file, or if we have a directory
1894  and recursion or skipping was not requested, or if we have anything else and  and recursion or skipping was not requested, or if we have anything else and
# Line 1664  skipping was not requested. The scan pro Line 1896  skipping was not requested. The scan pro
1896  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1897  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1898    
1899    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1900  pathlen = (int)(strlen(pathname));  pathlen = (int)(strlen(pathname));
1901    #endif
1902    
1903  /* Open using zlib if it is supported and the file name ends with .gz. */  /* Open using zlib if it is supported and the file name ends with .gz. */
1904    
# Line 1838  for (op = optionlist; op->one_char != 0; Line 2072  for (op = optionlist; op->one_char != 0;
2072    
2073    if (strchr(op->long_name, '_') != NULL) continue;    if (strchr(op->long_name, '_') != NULL) continue;
2074    
2075    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0 && (op->long_name)[0] == 0)
2076    n = 31 - printf("  %s --%s", s, op->long_name);      n = 31 - printf("  -%c", op->one_char);
2077      else
2078        {
2079        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
2080          else strcpy(s, "   ");
2081        n = 31 - printf("  %s --%s", s, op->long_name);
2082        }
2083    
2084    if (n < 1) n = 1;    if (n < 1) n = 1;
2085    printf("%.*s%s\n", n, "                     ", op->help_text);    printf("%.*s%s\n", n, "                           ", op->help_text);
2086    }    }
2087    
2088  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
2089  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
2090  printf("When reading patterns from a file instead of using a command line option,\n");  printf("When reading patterns or file names from a file, trailing white\n");
2091  printf("trailing white space is removed and blank lines are ignored.\n");  printf("space is removed and blank lines are ignored.\n");
2092  printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",  printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
   MAX_PATTERN_COUNT, PATBUFSIZE);  
2093    
2094  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
2095  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 1872  switch(letter) Line 2112  switch(letter)
2112    case N_LBUFFER: line_buffered = TRUE; break;    case N_LBUFFER: line_buffered = TRUE; break;
2113    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
2114    case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;    case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2115      case 'a': binary_files = BIN_TEXT; break;
2116    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
2117    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
2118    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
2119      case 'I': binary_files = BIN_NOMATCH; break;
2120    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
2121    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
2122    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
# Line 1891  switch(letter) Line 2133  switch(letter)
2133    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
2134    
2135    case 'V':    case 'V':
2136    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stdout, "pcregrep version %s\n", pcre_version());
2137    pcregrep_exit(0);    pcregrep_exit(0);
2138    break;    break;
2139    
# Line 1935  return buffer; Line 2177  return buffer;
2177  *          Compile a single pattern              *  *          Compile a single pattern              *
2178  *************************************************/  *************************************************/
2179    
2180  /* When the -F option has been used, this is called for each substring.  /* Do nothing if the pattern has already been compiled. This is the case for
2181  Otherwise it's called for each supplied pattern.  include/exclude patterns read from a file.
2182    
2183    When the -F option has been used, each "pattern" may be a list of strings,
2184    separated by line breaks. They will be matched literally. We split such a
2185    string and compile the first substring, inserting an additional block into the
2186    pattern chain.
2187    
2188  Arguments:  Arguments:
2189    pattern        the pattern string    p              points to the pattern block
2190    options        the PCRE options    options        the PCRE options
2191    filename       the file name, or NULL for a command-line pattern    popts          the processing options
2192      fromfile       TRUE if the pattern was read from a file
2193      fromtext       file name or identifying text (e.g. "include")
2194    count          0 if this is the only command line pattern, or    count          0 if this is the only command line pattern, or
2195                   number of the command line pattern, or                   number of the command line pattern, or
2196                   linenumber for a pattern from a file                   linenumber for a pattern from a file
# Line 1950  Returns:         TRUE on success, FALSE Line 2199  Returns:         TRUE on success, FALSE
2199  */  */
2200    
2201  static BOOL  static BOOL
2202  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_pattern(patstr *p, int options, int popts, int fromfile,
2203      const char *fromtext, int count)
2204  {  {
2205  char buffer[PATBUFSIZE];  char buffer[PATBUFSIZE];
2206  const char *error;  const char *error;
2207    char *ps = p->string;
2208    int patlen = strlen(ps);
2209  int errptr;  int errptr;
2210    
2211  if (pattern_count >= MAX_PATTERN_COUNT)  if (p->compiled != NULL) return TRUE;
   {  
   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",  
     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);  
   return FALSE;  
   }  
2212    
2213  sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,  if ((popts & PO_FIXED_STRINGS) != 0)
   suffix[process_options]);  
 pattern_list[pattern_count] =  
   pcre_compile(buffer, options, &error, &errptr, pcretables);  
 if (pattern_list[pattern_count] != NULL)  
2214    {    {
2215    pattern_count++;    int ellength;
2216    return TRUE;    char *eop = ps + patlen;
2217      char *pe = end_of_line(ps, eop, &ellength);
2218    
2219      if (ellength != 0)
2220        {
2221        if (add_pattern(pe, p) == NULL) return FALSE;
2222        patlen = (int)(pe - ps - ellength);
2223        }
2224    }    }
2225    
2226    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2227    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2228    if (p->compiled != NULL) return TRUE;
2229    
2230  /* Handle compile errors */  /* Handle compile errors */
2231    
2232  errptr -= (int)strlen(prefix[process_options]);  errptr -= (int)strlen(prefix[popts]);
2233  if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);  if (errptr > patlen) errptr = patlen;
2234    
2235  if (filename == NULL)  if (fromfile)
2236    {    {
2237    if (count == 0)    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2238      fprintf(stderr, "pcregrep: Error in command-line regex "      "at offset %d: %s\n", count, fromtext, errptr, error);
       "at offset %d: %s\n", errptr, error);  
   else  
     fprintf(stderr, "pcregrep: Error in %s command-line regex "  
       "at offset %d: %s\n", ordin(count), errptr, error);  
2239    }    }
2240  else  else
2241    {    {
2242    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "    if (count == 0)
2243      "at offset %d: %s\n", count, filename, errptr, error);      fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2244          fromtext, errptr, error);
2245      else
2246        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2247          ordin(count), fromtext, errptr, error);
2248    }    }
2249    
2250  return FALSE;  return FALSE;
# Line 1999  return FALSE; Line 2253  return FALSE;
2253    
2254    
2255  /*************************************************  /*************************************************
2256  *           Compile one supplied pattern         *  *     Read and compile a file of patterns        *
2257  *************************************************/  *************************************************/
2258    
2259  /* When the -F option has been used, each string may be a list of strings,  /* This is used for --filelist, --include-from, and --exclude-from.
 separated by line breaks. They will be matched literally.  
2260    
2261  Arguments:  Arguments:
2262    pattern        the pattern string    name         the name of the file; "-" is stdin
2263    options        the PCRE options    patptr       pointer to the pattern chain anchor
2264    filename       the file name, or NULL for a command-line pattern    patlastptr   pointer to the last pattern pointer
2265    count          0 if this is the only command line pattern, or    popts        the process options to pass to pattern_compile()
                  number of the command line pattern, or  
                  linenumber for a pattern from a file  
2266    
2267  Returns:         TRUE on success, FALSE after an error  Returns:       TRUE if all went well
2268  */  */
2269    
2270  static BOOL  static BOOL
2271  compile_pattern(char *pattern, int options, char *filename, int count)  read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2272  {  {
2273  if ((process_options & PO_FIXED_STRINGS) != 0)  int linenumber = 0;
2274    FILE *f;
2275    char *filename;
2276    char buffer[PATBUFSIZE];
2277    
2278    if (strcmp(name, "-") == 0)
2279    {    {
2280    char *eop = pattern + strlen(pattern);    f = stdin;
2281    char buffer[PATBUFSIZE];    filename = stdin_name;
2282      }
2283    else
2284      {
2285      f = fopen(name, "r");
2286      if (f == NULL)
2287        {
2288        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2289        return FALSE;
2290        }
2291      filename = name;
2292      }
2293    
2294    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2295      {
2296      char *s = buffer + (int)strlen(buffer);
2297      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2298      *s = 0;
2299      linenumber++;
2300      if (buffer[0] == 0) continue;   /* Skip blank lines */
2301    
2302      /* Note: this call to add_pattern() puts a pointer to the local variable
2303      "buffer" into the pattern chain. However, that pointer is used only when
2304      compiling the pattern, which happens immediately below, so we flatten it
2305      afterwards, as a precaution against any later code trying to use it. */
2306    
2307      *patlastptr = add_pattern(buffer, *patlastptr);
2308      if (*patlastptr == NULL) return FALSE;
2309      if (*patptr == NULL) *patptr = *patlastptr;
2310    
2311      /* This loop is needed because compiling a "pattern" when -F is set may add
2312      on additional literal patterns if the original contains a newline. In the
2313      common case, it never will, because fgets() stops at a newline. However,
2314      the -N option can be used to give pcregrep a different newline setting. */
2315    
2316    for(;;)    for(;;)
2317      {      {
2318      int ellength;      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2319      char *p = end_of_line(pattern, eop, &ellength);          linenumber))
     if (ellength == 0)  
       return compile_single_pattern(pattern, options, filename, count);  
     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);  
     pattern = p;  
     if (!compile_single_pattern(buffer, options, filename, count))  
2320        return FALSE;        return FALSE;
2321        (*patlastptr)->string = NULL;            /* Insurance */
2322        if ((*patlastptr)->next == NULL) break;
2323        *patlastptr = (*patlastptr)->next;
2324      }      }
2325    }    }
2326  else return compile_single_pattern(pattern, options, filename, count);  
2327    if (f != stdin) fclose(f);
2328    return TRUE;
2329  }  }
2330    
2331    
# Line 2051  main(int argc, char **argv) Line 2341  main(int argc, char **argv)
2341  {  {
2342  int i, j;  int i, j;
2343  int rc = 1;  int rc = 1;
 int pcre_options = 0;  
 int cmd_pattern_count = 0;  
 int hint_count = 0;  
 int errptr;  
2344  BOOL only_one_at_top;  BOOL only_one_at_top;
2345  char *patterns[MAX_PATTERN_COUNT];  patstr *cp;
2346    fnstr *fn;
2347  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2348  const char *error;  const char *error;
2349    
# Line 2096  for (i = 1; i < argc; i++) Line 2383  for (i = 1; i < argc; i++)
2383    
2384    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2385      {      {
2386      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_files != NULL || patterns != NULL) break;
2387        else pcregrep_exit(usage(2));        else pcregrep_exit(usage(2));
2388      }      }
2389    
# Line 2306  for (i = 1; i < argc; i++) Line 2593  for (i = 1; i < argc; i++)
2593      option_data = argv[++i];      option_data = argv[++i];
2594      }      }
2595    
2596    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_PATLIST, it's the -e option, or one of the
2597    multiple times to create a list of patterns. */    include/exclude options, which can be called multiple times to create lists
2598      of patterns. */
2599    
2600    if (op->type == OP_PATLIST)    if (op->type == OP_PATLIST)
2601         {
2602         patdatastr *pd = (patdatastr *)op->dataptr;
2603         *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2604         if (*(pd->lastptr) == NULL) goto EXIT2;
2605         if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2606         }
2607    
2608      /* If the option type is OP_FILELIST, it's one of the options that names a
2609      file. */
2610    
2611      else if (op->type == OP_FILELIST)
2612        {
2613        fndatastr *fd = (fndatastr *)op->dataptr;
2614        fn = (fnstr *)malloc(sizeof(fnstr));
2615        if (fn == NULL)
2616          {
2617          fprintf(stderr, "pcregrep: malloc failed\n");
2618          goto EXIT2;
2619          }
2620        fn->next = NULL;
2621        fn->name = option_data;
2622        if (*(fd->anchor) == NULL)
2623          *(fd->anchor) = fn;
2624        else
2625          (*(fd->lastptr))->next = fn;
2626        *(fd->lastptr) = fn;
2627        }
2628    
2629      /* Handle OP_BINARY_FILES */
2630    
2631      else if (op->type == OP_BINFILES)
2632      {      {
2633      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      if (strcmp(option_data, "binary") == 0)
2634          binary_files = BIN_BINARY;
2635        else if (strcmp(option_data, "without-match") == 0)
2636          binary_files = BIN_NOMATCH;
2637        else if (strcmp(option_data, "text") == 0)
2638          binary_files = BIN_TEXT;
2639        else
2640        {        {
2641        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",        fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2642          MAX_PATTERN_COUNT);          option_data);
2643        return 2;        pcregrep_exit(usage(2));
2644        }        }
     patterns[cmd_pattern_count++] = option_data;  
2645      }      }
2646    
2647    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
# Line 2514  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 2838  if (jfriedl_XT != 0 || jfriedl_XR != 0)
2838    }    }
2839  #endif  #endif
2840    
2841  /* Get memory for the main buffer, and to store the pattern and hints lists. */  /* Get memory for the main buffer. */
2842    
2843  bufsize = 3*bufthird;  bufsize = 3*bufthird;
2844  main_buffer = (char *)malloc(bufsize);  main_buffer = (char *)malloc(bufsize);
 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  
 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  
2845    
2846  if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL)
2847    {    {
2848    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2849    goto EXIT2;    goto EXIT2;
2850    }    }
2851    
2852  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there are no files provided by -f,
2853  the first argument is the one and only pattern, and it must exist. */  the first argument is the one and only pattern, and it must exist. */
2854    
2855  if (cmd_pattern_count == 0 && pattern_filename == NULL)  if (patterns == NULL && pattern_files == NULL)
2856    {    {
2857    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2858    patterns[cmd_pattern_count++] = argv[i++];    patterns = patterns_last = add_pattern(argv[i++], NULL);
2859      if (patterns == NULL) goto EXIT2;
2860    }    }
2861    
2862  /* Compile the patterns that were provided on the command line, either by  /* Compile the patterns that were provided on the command line, either by
2863  multiple uses of -e or as a single unkeyed pattern. */  multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2864    after all the command-line options are read so that we know which PCRE options
2865    to use. When -F is used, compile_pattern() may add another block into the
2866    chain, so we must not access the next pointer till after the compile. */
2867    
2868  for (j = 0; j < cmd_pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2869    {    {
2870    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
2871         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 1 && patterns->next == NULL)? 0 : j))
2872      goto EXIT2;      goto EXIT2;
2873    }    }
2874    
2875  /* Compile the regular expressions that are provided in a file. */  /* Read and compile the regular expressions that are provided in files. */
2876    
2877  if (pattern_filename != NULL)  for (fn = pattern_files; fn != NULL; fn = fn->next)
2878    {    {
2879    int linenumber = 0;    if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
2880    FILE *f;      goto EXIT2;
   char *filename;  
   char buffer[PATBUFSIZE];  
   
   if (strcmp(pattern_filename, "-") == 0)  
     {  
     f = stdin;  
     filename = stdin_name;  
     }  
   else  
     {  
     f = fopen(pattern_filename, "r");  
     if (f == NULL)  
       {  
       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,  
         strerror(errno));  
       goto EXIT2;  
       }  
     filename = pattern_filename;  
     }  
   
   while (fgets(buffer, PATBUFSIZE, f) != NULL)  
     {  
     char *s = buffer + (int)strlen(buffer);  
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;  
     *s = 0;  
     linenumber++;  
     if (buffer[0] == 0) continue;   /* Skip blank lines */  
     if (!compile_pattern(buffer, pcre_options, filename, linenumber))  
       goto EXIT2;  
     }  
   
   if (f != stdin) fclose(f);  
2881    }    }
2882    
2883  /* Study the regular expressions, as we will be running them many times. Unless  /* Study the regular expressions, as we will be running them many times. Unless
# Line 2594  if ((study_options & PCRE_STUDY_JIT_COMP Line 2888  if ((study_options & PCRE_STUDY_JIT_COMP
2888    jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);    jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2889  #endif  #endif
2890    
2891  for (j = 0; j < pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2892    {    {
2893    hints_list[j] = pcre_study(pattern_list[j], study_options, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
2894    if (error != NULL)    if (error != NULL)
2895      {      {
2896      char s[16];      char s[16];
2897      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
2898      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2899      goto EXIT2;      goto EXIT2;
2900      }      }
   hint_count++;  
2901  #ifdef SUPPORT_PCREGREP_JIT  #ifdef SUPPORT_PCREGREP_JIT
2902    if (jit_stack != NULL && hints_list[j] != NULL)    if (jit_stack != NULL && cp->hint != NULL)
2903      pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);      pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
2904  #endif  #endif
2905    }    }
2906    
# Line 2616  pcre_extra block for each pattern. */ Line 2909  pcre_extra block for each pattern. */
2909    
2910  if (match_limit > 0 || match_limit_recursion > 0)  if (match_limit > 0 || match_limit_recursion > 0)
2911    {    {
2912    for (j = 0; j < pattern_count; j++)    for (cp = patterns; cp != NULL; cp = cp->next)
2913      {      {
2914      if (hints_list[j] == NULL)      if (cp->hint == NULL)
2915        {        {
2916        hints_list[j] = malloc(sizeof(pcre_extra));        cp->hint = (pcre_extra *)malloc(sizeof(pcre_extra));
2917        if (hints_list[j] == NULL)        if (cp->hint == NULL)
2918          {          {
2919          fprintf(stderr, "pcregrep: malloc failed\n");          fprintf(stderr, "pcregrep: malloc failed\n");
2920          pcregrep_exit(2);          pcregrep_exit(2);
# Line 2629  if (match_limit > 0 || match_limit_recur Line 2922  if (match_limit > 0 || match_limit_recur
2922        }        }
2923      if (match_limit > 0)      if (match_limit > 0)
2924        {        {
2925        hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
2926        hints_list[j]->match_limit = match_limit;        cp->hint->match_limit = match_limit;
2927        }        }
2928      if (match_limit_recursion > 0)      if (match_limit_recursion > 0)
2929        {        {
2930        hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2931        hints_list[j]->match_limit_recursion = match_limit_recursion;        cp->hint->match_limit_recursion = match_limit_recursion;
2932        }        }
2933      }      }
2934    }    }
2935    
2936  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns read from the command line, compile
2937    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
2938    0. */
2939    
2940  if (exclude_pattern != NULL)  for (j = 0; j < 4; j++)
2941    {    {
2942    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,    int k;
2943      pcretables);    for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
   if (exclude_compiled == NULL)  
2944      {      {
2945      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
2946        errptr, error);           (k == 1 && cp->next == NULL)? 0 : k))
2947      goto EXIT2;        goto EXIT2;
2948      }      }
2949    }    }
2950    
2951  if (include_pattern != NULL)  /* Read and compile include/exclude patterns from files. */
   {  
   include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,  
     pcretables);  
   if (include_compiled == NULL)  
     {  
     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",  
       errptr, error);  
     goto EXIT2;  
     }  
   }  
2952    
2953  if (exclude_dir_pattern != NULL)  for (fn = include_from; fn != NULL; fn = fn->next)
2954    {    {
2955    exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,    if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
     pcretables);  
   if (exclude_dir_compiled == NULL)  
     {  
     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",  
       errptr, error);  
2956      goto EXIT2;      goto EXIT2;
     }  
2957    }    }
2958    
2959  if (include_dir_pattern != NULL)  for (fn = exclude_from; fn != NULL; fn = fn->next)
2960    {    {
2961    include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,    if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
     pcretables);  
   if (include_dir_compiled == NULL)  
     {  
     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",  
       errptr, error);  
2962      goto EXIT2;      goto EXIT2;
     }  
2963    }    }
2964    
2965  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no files that contain lists of files to search, and there are
2966    no file arguments, search stdin, and then exit. */
2967    
2968  if (i >= argc)  if (file_lists == NULL && i >= argc)
2969    {    {
2970    rc = pcregrep(stdin, FR_PLAIN, stdin_name,    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2971      (filenames > FN_DEFAULT)? stdin_name : NULL);      (filenames > FN_DEFAULT)? stdin_name : NULL);
2972    goto EXIT;    goto EXIT;
2973    }    }
2974    
2975  /* Otherwise, work through the remaining arguments as files or directories.  /* If any files that contains a list of files to search have been specified,
2976  Pass in the fact that there is only one argument at top level - this suppresses  read them line by line and search the given files. */
 the file name if the argument is not a directory and filenames are not  
 otherwise forced. */  
2977    
2978  only_one_at_top = i == argc - 1;   /* Catch initial value of i */  for (fn = file_lists; fn != NULL; fn = fn->next)
2979      {
2980      char buffer[PATBUFSIZE];
2981      FILE *fl;
2982      if (strcmp(fn->name, "-") == 0) fl = stdin; else
2983        {
2984        fl = fopen(fn->name, "rb");
2985        if (fl == NULL)
2986          {
2987          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
2988            strerror(errno));
2989          goto EXIT2;
2990          }
2991        }
2992      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2993        {
2994        int frc;
2995        char *end = buffer + (int)strlen(buffer);
2996        while (end > buffer && isspace(end[-1])) end--;
2997        *end = 0;
2998        if (*buffer != 0)
2999          {
3000          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3001          if (frc > 1) rc = frc;
3002            else if (frc == 0 && rc == 1) rc = 0;
3003          }
3004        }
3005      if (fl != stdin) fclose(fl);
3006      }
3007    
3008    /* After handling file-list, work through remaining arguments. Pass in the fact
3009    that there is only one argument at top level - this suppresses the file name if
3010    the argument is not a directory and filenames are not otherwise forced. */
3011    
3012    only_one_at_top = i == argc - 1 && file_lists == NULL;
3013    
3014  for (; i < argc; i++)  for (; i < argc; i++)
3015    {    {
# Line 2718  EXIT: Line 3023  EXIT:
3023  #ifdef SUPPORT_PCREGREP_JIT  #ifdef SUPPORT_PCREGREP_JIT
3024  if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);  if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3025  #endif  #endif
3026    
3027  if (main_buffer != NULL) free(main_buffer);  if (main_buffer != NULL) free(main_buffer);
3028  if (pattern_list != NULL)  
3029    {  free_pattern_chain(patterns);
3030    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);  free_pattern_chain(include_patterns);
3031    free(pattern_list);  free_pattern_chain(include_dir_patterns);
3032    }  free_pattern_chain(exclude_patterns);
3033  if (hints_list != NULL)  free_pattern_chain(exclude_dir_patterns);
3034    {  
3035    for (i = 0; i < hint_count; i++)  free_file_chain(exclude_from);
3036      {  free_file_chain(include_from);
3037      if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);  free_file_chain(pattern_files);
3038      }  free_file_chain(file_lists);
3039    free(hints_list);  
   }  
3040  pcregrep_exit(rc);  pcregrep_exit(rc);
3041    
3042  EXIT2:  EXIT2:

Legend:
Removed from v.835  
changed lines
  Added in v.1033

  ViewVC Help
Powered by ViewVC 1.1.5