/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 971 by ph10, Fri Jun 1 16:29:43 2012 UTC revision 1096 by chpe, Tue Oct 16 15:56:09 2012 UTC
# Line 70  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define MAX_PATTERN_COUNT 100  
73  #define OFFSET_SIZE 99  #define OFFSET_SIZE 99
74    
75  #if BUFSIZ > 8192  #if BUFSIZ > 8192
76  #define PATBUFSIZE BUFSIZ  #define MAXPATLEN BUFSIZ
77  #else  #else
78  #define PATBUFSIZE 8192  #define MAXPATLEN 8192
79  #endif  #endif
80    
81    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
82    
83  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
84  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
85  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
# Line 139  static char *colour_string = (char *)"1; Line 140  static char *colour_string = (char *)"1;
140  static char *colour_option = NULL;  static char *colour_option = NULL;
141  static char *dee_option = NULL;  static char *dee_option = NULL;
142  static char *DEE_option = NULL;  static char *DEE_option = NULL;
143    static char *locale = NULL;
144  static char *main_buffer = NULL;  static char *main_buffer = NULL;
145  static char *newline = NULL;  static char *newline = NULL;
146  static char *pattern_filename = NULL;  static char *om_separator = (char *)"";
147  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
 static char *locale = NULL;  
148    
149  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
150    
 static int  pattern_count = 0;  
 static pcre **pattern_list = NULL;  
 static pcre_extra **hints_list = NULL;  
   
 static char *file_list = NULL;  
 static char *include_pattern = NULL;  
 static char *exclude_pattern = NULL;  
 static char *include_dir_pattern = NULL;  
 static char *exclude_dir_pattern = NULL;  
   
 static pcre *include_compiled = NULL;  
 static pcre *exclude_compiled = NULL;  
 static pcre *include_dir_compiled = NULL;  
 static pcre *exclude_dir_compiled = NULL;  
   
151  static int after_context = 0;  static int after_context = 0;
152  static int before_context = 0;  static int before_context = 0;
153  static int binary_files = BIN_BINARY;  static int binary_files = BIN_BINARY;
154  static int both_context = 0;  static int both_context = 0;
155  static int bufthird = PCREGREP_BUFSIZE;  static int bufthird = PCREGREP_BUFSIZE;
156  static int bufsize = 3*PCREGREP_BUFSIZE;  static int bufsize = 3*PCREGREP_BUFSIZE;
157    
158    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
159    static int dee_action = dee_SKIP;
160    #else
161  static int dee_action = dee_READ;  static int dee_action = dee_READ;
162    #endif
163    
164  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
165  static int error_count = 0;  static int error_count = 0;
166  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
167  static int only_matching = -1;  static int pcre_options = 0;
168  static int process_options = 0;  static int process_options = 0;
169    
170  #ifdef SUPPORT_PCREGREP_JIT  #ifdef SUPPORT_PCREGREP_JIT
# Line 196  static BOOL number = FALSE; Line 188  static BOOL number = FALSE;
188  static BOOL omit_zero_count = FALSE;  static BOOL omit_zero_count = FALSE;
189  static BOOL resource_error = FALSE;  static BOOL resource_error = FALSE;
190  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
191    static BOOL show_only_matching = FALSE;
192  static BOOL silent = FALSE;  static BOOL silent = FALSE;
193  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
194    
195    /* Structure for list of --only-matching capturing numbers. */
196    
197    typedef struct omstr {
198      struct omstr *next;
199      int groupnum;
200    } omstr;
201    
202    static omstr *only_matching = NULL;
203    static omstr *only_matching_last = NULL;
204    
205    /* Structure for holding the two variables that describe a number chain. */
206    
207    typedef struct omdatastr {
208      omstr **anchor;
209      omstr **lastptr;
210    } omdatastr;
211    
212    static omdatastr only_matching_data = { &only_matching, &only_matching_last };
213    
214    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
215    
216    typedef struct fnstr {
217      struct fnstr *next;
218      char *name;
219    } fnstr;
220    
221    static fnstr *exclude_from = NULL;
222    static fnstr *exclude_from_last = NULL;
223    static fnstr *include_from = NULL;
224    static fnstr *include_from_last = NULL;
225    
226    static fnstr *file_lists = NULL;
227    static fnstr *file_lists_last = NULL;
228    static fnstr *pattern_files = NULL;
229    static fnstr *pattern_files_last = NULL;
230    
231    /* Structure for holding the two variables that describe a file name chain. */
232    
233    typedef struct fndatastr {
234      fnstr **anchor;
235      fnstr **lastptr;
236    } fndatastr;
237    
238    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
239    static fndatastr include_from_data = { &include_from, &include_from_last };
240    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
241    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
242    
243    /* Structure for pattern and its compiled form; used for matching patterns and
244    also for include/exclude patterns. */
245    
246    typedef struct patstr {
247      struct patstr *next;
248      char *string;
249      pcre *compiled;
250      pcre_extra *hint;
251    } patstr;
252    
253    static patstr *patterns = NULL;
254    static patstr *patterns_last = NULL;
255    static patstr *include_patterns = NULL;
256    static patstr *include_patterns_last = NULL;
257    static patstr *exclude_patterns = NULL;
258    static patstr *exclude_patterns_last = NULL;
259    static patstr *include_dir_patterns = NULL;
260    static patstr *include_dir_patterns_last = NULL;
261    static patstr *exclude_dir_patterns = NULL;
262    static patstr *exclude_dir_patterns_last = NULL;
263    
264    /* Structure holding the two variables that describe a pattern chain. A pointer
265    to such structures is used for each appropriate option. */
266    
267    typedef struct patdatastr {
268      patstr **anchor;
269      patstr **lastptr;
270    } patdatastr;
271    
272    static patdatastr match_patdata = { &patterns, &patterns_last };
273    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
274    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
275    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
276    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
277    
278    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
279                                     &include_dir_patterns, &exclude_dir_patterns };
280    
281    static const char *incexname[4] = { "--include", "--exclude",
282                                        "--include-dir", "--exclude-dir" };
283    
284  /* Structure for options and list of them */  /* Structure for options and list of them */
285    
286  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
287         OP_OP_NUMBER, OP_PATLIST, OP_BINFILES };         OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
288    
289  typedef struct option_item {  typedef struct option_item {
290    int type;    int type;
# Line 233  used to identify them. */ Line 315  used to identify them. */
315  #define N_NOJIT        (-16)  #define N_NOJIT        (-16)
316  #define N_FILE_LIST    (-17)  #define N_FILE_LIST    (-17)
317  #define N_BINARY_FILES (-18)  #define N_BINARY_FILES (-18)
318    #define N_EXCLUDE_FROM (-19)
319    #define N_INCLUDE_FROM (-20)
320    #define N_OM_SEPARATOR (-21)
321    
322  static option_item optionlist[] = {  static option_item optionlist[] = {
323    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
# Line 248  static option_item optionlist[] = { Line 333  static option_item optionlist[] = {
333    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
334    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
335    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
336    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
337    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
338    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
339    { OP_STRING,     N_FILE_LIST, &file_list,     "file-list=path","read files to search from file" },    { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
340    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
341    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
342    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
# Line 273  static option_item optionlist[] = { Line 358  static option_item optionlist[] = {
358    { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
359    { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
360    { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
361    { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },    { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
362      { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
363    { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
364    { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
365    { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
366    { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
367    { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },    { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
368    { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },    { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
369      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
370      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
371    
372    /* These two were accidentally implemented with underscores instead of    /* These two were accidentally implemented with underscores instead of
373    hyphens in the option names. As this was not discovered for several releases,    hyphens in the option names. As this was not discovered for several releases,
374    the incorrect versions are left in the table for compatibility. However, the    the incorrect versions are left in the table for compatibility. However, the
375    --help function misses out any option that has an underscore in its name. */    --help function misses out any option that has an underscore in its name. */
376    
377    { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },    { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
378    { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },    { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
379    
380  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
381    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
# Line 304  static option_item optionlist[] = { Line 392  static option_item optionlist[] = {
392  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F  /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
393  options. These set the 1, 2, and 4 bits in process_options, respectively. Note  options. These set the 1, 2, and 4 bits in process_options, respectively. Note
394  that the combination of -w and -x has the same effect as -x on its own, so we  that the combination of -w and -x has the same effect as -x on its own, so we
395  can treat them as the same. */  can treat them as the same. Note that the MAXPATLEN macro assumes the longest
396    prefix+suffix is 10 characters; if anything longer is added, it must be
397    adjusted. */
398    
399  static const char *prefix[] = {  static const char *prefix[] = {
400    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };    "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
# Line 344  if (resource_error) Line 434  if (resource_error)
434      PCRE_ERROR_JIT_STACKLIMIT);      PCRE_ERROR_JIT_STACKLIMIT);
435    fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");    fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
436    }    }
   
437  exit(rc);  exit(rc);
438  }  }
439    
440    
441  /*************************************************  /*************************************************
442    *          Add item to chain of patterns         *
443    *************************************************/
444    
445    /* Used to add an item onto a chain, or just return an unconnected item if the
446    "after" argument is NULL.
447    
448    Arguments:
449      s          pattern string to add
450      after      if not NULL points to item to insert after
451    
452    Returns:     new pattern block
453    */
454    
455    static patstr *
456    add_pattern(char *s, patstr *after)
457    {
458    patstr *p = (patstr *)malloc(sizeof(patstr));
459    if (p == NULL)
460      {
461      fprintf(stderr, "pcregrep: malloc failed\n");
462      pcregrep_exit(2);
463      }
464    if (strlen(s) > MAXPATLEN)
465      {
466      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
467        MAXPATLEN);
468      return NULL;
469      }
470    p->next = NULL;
471    p->string = s;
472    p->compiled = NULL;
473    p->hint = NULL;
474    
475    if (after != NULL)
476      {
477      p->next = after->next;
478      after->next = p;
479      }
480    return p;
481    }
482    
483    
484    /*************************************************
485    *           Free chain of patterns               *
486    *************************************************/
487    
488    /* Used for several chains of patterns.
489    
490    Argument: pointer to start of chain
491    Returns:  nothing
492    */
493    
494    static void
495    free_pattern_chain(patstr *pc)
496    {
497    while (pc != NULL)
498      {
499      patstr *p = pc;
500      pc = p->next;
501      if (p->hint != NULL) pcre_free_study(p->hint);
502      if (p->compiled != NULL) pcre_free(p->compiled);
503      free(p);
504      }
505    }
506    
507    
508    /*************************************************
509    *           Free chain of file names             *
510    *************************************************/
511    
512    /*
513    Argument: pointer to start of chain
514    Returns:  nothing
515    */
516    
517    static void
518    free_file_chain(fnstr *fn)
519    {
520    while (fn != NULL)
521      {
522      fnstr *f = fn;
523      fn = f->next;
524      free(f);
525      }
526    }
527    
528    
529    /*************************************************
530  *            OS-specific functions               *  *            OS-specific functions               *
531  *************************************************/  *************************************************/
532    
# Line 365  although at present the only ones are fo Line 542  although at present the only ones are fo
542  #include <dirent.h>  #include <dirent.h>
543    
544  typedef DIR directory_type;  typedef DIR directory_type;
545    #define FILESEP '/'
546    
547  static int  static int
548  isdirectory(char *filename)  isdirectory(char *filename)
# Line 372  isdirectory(char *filename) Line 550  isdirectory(char *filename)
550  struct stat statbuf;  struct stat statbuf;
551  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
552    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
553  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
554  }  }
555    
556  static directory_type *  static directory_type *
# Line 459  BOOL first; Line 637  BOOL first;
637  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
638  } directory_type;  } directory_type;
639    
640    #define FILESEP '/'
641    
642  int  int
643  isdirectory(char *filename)  isdirectory(char *filename)
644  {  {
645  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
646  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
647    return 0;    return 0;
648  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
649  }  }
650    
651  directory_type *  directory_type *
# Line 476  char *pattern; Line 656  char *pattern;
656  directory_type *dir;  directory_type *dir;
657  DWORD err;  DWORD err;
658  len = strlen(filename);  len = strlen(filename);
659  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
660  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
661  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
662    {    {
663    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
# Line 563  return FALSE; Line 743  return FALSE;
743    
744  #else  #else
745    
746    #define FILESEP 0
747  typedef void directory_type;  typedef void directory_type;
748    
749  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
# Line 619  return sys_errlist[n]; Line 800  return sys_errlist[n];
800    
801    
802  /*************************************************  /*************************************************
803    *                Usage function                  *
804    *************************************************/
805    
806    static int
807    usage(int rc)
808    {
809    option_item *op;
810    fprintf(stderr, "Usage: pcregrep [-");
811    for (op = optionlist; op->one_char != 0; op++)
812      {
813      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
814      }
815    fprintf(stderr, "] [long options] [pattern] [files]\n");
816    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
817      "options.\n");
818    return rc;
819    }
820    
821    
822    
823    /*************************************************
824    *                Help function                   *
825    *************************************************/
826    
827    static void
828    help(void)
829    {
830    option_item *op;
831    
832    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
833    printf("Search for PATTERN in each FILE or standard input.\n");
834    printf("PATTERN must be present if neither -e nor -f is used.\n");
835    printf("\"-\" can be used as a file name to mean STDIN.\n");
836    
837    #ifdef SUPPORT_LIBZ
838    printf("Files whose names end in .gz are read using zlib.\n");
839    #endif
840    
841    #ifdef SUPPORT_LIBBZ2
842    printf("Files whose names end in .bz2 are read using bzlib2.\n");
843    #endif
844    
845    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
846    printf("Other files and the standard input are read as plain files.\n\n");
847    #else
848    printf("All files are read as plain files, without any interpretation.\n\n");
849    #endif
850    
851    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
852    printf("Options:\n");
853    
854    for (op = optionlist; op->one_char != 0; op++)
855      {
856      int n;
857      char s[4];
858    
859      /* Two options were accidentally implemented and documented with underscores
860      instead of hyphens in their names, something that was not noticed for quite a
861      few releases. When fixing this, I left the underscored versions in the list
862      in case people were using them. However, we don't want to display them in the
863      help data. There are no other options that contain underscores, and we do not
864      expect ever to implement such options. Therefore, just omit any option that
865      contains an underscore. */
866    
867      if (strchr(op->long_name, '_') != NULL) continue;
868    
869      if (op->one_char > 0 && (op->long_name)[0] == 0)
870        n = 31 - printf("  -%c", op->one_char);
871      else
872        {
873        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
874          else strcpy(s, "   ");
875        n = 31 - printf("  %s --%s", s, op->long_name);
876        }
877    
878      if (n < 1) n = 1;
879      printf("%.*s%s\n", n, "                           ", op->help_text);
880      }
881    
882    printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
883    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
884    printf("When reading patterns or file names from a file, trailing white\n");
885    printf("space is removed and blank lines are ignored.\n");
886    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
887    
888    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
889    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
890    }
891    
892    
893    
894    /*************************************************
895    *            Test exclude/includes               *
896    *************************************************/
897    
898    /* If any exclude pattern matches, the path is excluded. Otherwise, unless
899    there are no includes, the path must match an include pattern.
900    
901    Arguments:
902      path      the path to be matched
903      ip        the chain of include patterns
904      ep        the chain of exclude patterns
905    
906    Returns:    TRUE if the path is not excluded
907    */
908    
909    static BOOL
910    test_incexc(char *path, patstr *ip, patstr *ep)
911    {
912    int plen = strlen(path);
913    
914    for (; ep != NULL; ep = ep->next)
915      {
916      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
917        return FALSE;
918      }
919    
920    if (ip == NULL) return TRUE;
921    
922    for (; ip != NULL; ip = ip->next)
923      {
924      if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
925        return TRUE;
926      }
927    
928    return FALSE;
929    }
930    
931    
932    
933    /*************************************************
934    *         Decode integer argument value          *
935    *************************************************/
936    
937    /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
938    because SunOS4 doesn't have it. This is used only for unpicking arguments, so
939    just keep it simple.
940    
941    Arguments:
942      option_data   the option data string
943      op            the option item (for error messages)
944      longop        TRUE if option given in long form
945    
946    Returns:        a long integer
947    */
948    
949    static long int
950    decode_number(char *option_data, option_item *op, BOOL longop)
951    {
952    unsigned long int n = 0;
953    char *endptr = option_data;
954    while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
955    while (isdigit((unsigned char)(*endptr)))
956      n = n * 10 + (int)(*endptr++ - '0');
957    if (toupper(*endptr) == 'K')
958      {
959      n *= 1024;
960      endptr++;
961      }
962    else if (toupper(*endptr) == 'M')
963      {
964      n *= 1024*1024;
965      endptr++;
966      }
967    
968    if (*endptr != 0)   /* Error */
969      {
970      if (longop)
971        {
972        char *equals = strchr(op->long_name, '=');
973        int nlen = (equals == NULL)? (int)strlen(op->long_name) :
974          (int)(equals - op->long_name);
975        fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
976          option_data, nlen, op->long_name);
977        }
978      else
979        fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
980          option_data, op->one_char);
981      pcregrep_exit(usage(2));
982      }
983    
984    return n;
985    }
986    
987    
988    
989    /*************************************************
990    *       Add item to a chain of numbers           *
991    *************************************************/
992    
993    /* Used to add an item onto a chain, or just return an unconnected item if the
994    "after" argument is NULL.
995    
996    Arguments:
997      n          the number to add
998      after      if not NULL points to item to insert after
999    
1000    Returns:     new number block
1001    */
1002    
1003    static omstr *
1004    add_number(int n, omstr *after)
1005    {
1006    omstr *om = (omstr *)malloc(sizeof(omstr));
1007    
1008    if (om == NULL)
1009      {
1010      fprintf(stderr, "pcregrep: malloc failed\n");
1011      pcregrep_exit(2);
1012      }
1013    om->next = NULL;
1014    om->groupnum = n;
1015    
1016    if (after != NULL)
1017      {
1018      om->next = after->next;
1019      after->next = om;
1020      }
1021    return om;
1022    }
1023    
1024    
1025    
1026    /*************************************************
1027  *            Read one line of input              *  *            Read one line of input              *
1028  *************************************************/  *************************************************/
1029    
# Line 734  switch(endlinetype) Line 1139  switch(endlinetype)
1139    
1140      switch (c)      switch (c)
1141        {        {
1142        case 0x0a:    /* LF */        case '\n':
1143        *lenptr = 1;        *lenptr = 1;
1144        return p;        return p;
1145    
1146        case 0x0d:    /* CR */        case '\r':
1147        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
1148          {          {
1149          *lenptr = 2;          *lenptr = 2;
1150          p++;          p++;
# Line 778  switch(endlinetype) Line 1183  switch(endlinetype)
1183    
1184      switch (c)      switch (c)
1185        {        {
1186        case 0x0a:    /* LF */        case '\n':    /* LF */
1187        case 0x0b:    /* VT */        case '\v':    /* VT */
1188        case 0x0c:    /* FF */        case '\f':    /* FF */
1189        *lenptr = 1;        *lenptr = 1;
1190        return p;        return p;
1191    
1192        case 0x0d:    /* CR */        case '\r':    /* CR */
1193        if (p < endptr && *p == 0x0a)        if (p < endptr && *p == '\n')
1194          {          {
1195          *lenptr = 2;          *lenptr = 2;
1196          p++;          p++;
# Line 793  switch(endlinetype) Line 1198  switch(endlinetype)
1198        else *lenptr = 1;        else *lenptr = 1;
1199        return p;        return p;
1200    
1201        case 0x85:    /* NEL */  #ifndef EBCDIC
1202          case 0x85:    /* Unicode NEL */
1203        *lenptr = utf8? 2 : 1;        *lenptr = utf8? 2 : 1;
1204        return p;        return p;
1205    
1206        case 0x2028:  /* LS */        case 0x2028:  /* Unicode LS */
1207        case 0x2029:  /* PS */        case 0x2029:  /* Unicode PS */
1208        *lenptr = 3;        *lenptr = 3;
1209        return p;        return p;
1210    #endif  /* Not EBCDIC */
1211    
1212        default:        default:
1213        break;        break;
# Line 859  switch(endlinetype) Line 1266  switch(endlinetype)
1266    
1267    while (p > startptr)    while (p > startptr)
1268      {      {
1269      register int c;      register unsigned int c;
1270      char *pp = p - 1;      char *pp = p - 1;
1271    
1272      if (utf8)      if (utf8)
# Line 884  switch(endlinetype) Line 1291  switch(endlinetype)
1291    
1292      if (endlinetype == EL_ANYCRLF) switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
1293        {        {
1294        case 0x0a:    /* LF */        case '\n':    /* LF */
1295        case 0x0d:    /* CR */        case '\r':    /* CR */
1296        return p;        return p;
1297    
1298        default:        default:
# Line 894  switch(endlinetype) Line 1301  switch(endlinetype)
1301    
1302      else switch (c)      else switch (c)
1303        {        {
1304        case 0x0a:    /* LF */        case '\n':    /* LF */
1305        case 0x0b:    /* VT */        case '\v':    /* VT */
1306        case 0x0c:    /* FF */        case '\f':    /* FF */
1307        case 0x0d:    /* CR */        case '\r':    /* CR */
1308        case 0x85:    /* NEL */  #ifndef EBCDIE
1309        case 0x2028:  /* LS */        case 0x85:    /* Unicode NEL */
1310        case 0x2029:  /* PS */        case 0x2028:  /* Unicode LS */
1311          case 0x2029:  /* Unicode PS */
1312    #endif  /* Not EBCDIC */
1313        return p;        return p;
1314    
1315        default:        default:
# Line 935  Arguments: Line 1344  Arguments:
1344  Returns:            nothing  Returns:            nothing
1345  */  */
1346    
1347  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,  static void
1348    char *endptr, char *printname)  do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1349      char *printname)
1350  {  {
1351  if (after_context > 0 && lastmatchnumber > 0)  if (after_context > 0 && lastmatchnumber > 0)
1352    {    {
# Line 983  match_patterns(char *matchptr, size_t le Line 1393  match_patterns(char *matchptr, size_t le
1393  {  {
1394  int i;  int i;
1395  size_t slen = length;  size_t slen = length;
1396    patstr *p = patterns;
1397  const char *msg = "this text:\n\n";  const char *msg = "this text:\n\n";
1398    
1399  if (slen > 200)  if (slen > 200)
1400    {    {
1401    slen = 200;    slen = 200;
1402    msg = "text that starts:\n\n";    msg = "text that starts:\n\n";
1403    }    }
1404  for (i = 0; i < pattern_count; i++)  for (i = 1; p != NULL; p = p->next, i++)
1405    {    {
1406    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,    *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1407      startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);      startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1408    if (*mrc >= 0) return TRUE;    if (*mrc >= 0) return TRUE;
1409    if (*mrc == PCRE_ERROR_NOMATCH) continue;    if (*mrc == PCRE_ERROR_NOMATCH) continue;
1410    fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);    fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1411    if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);    if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1412    fprintf(stderr, "%s", msg);    fprintf(stderr, "%s", msg);
1413    FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */    FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1414    fprintf(stderr, "\n\n");    fprintf(stderr, "\n\n");
# Line 1105  else Line 1517  else
1517  endptr = main_buffer + bufflength;  endptr = main_buffer + bufflength;
1518    
1519  /* Unless binary-files=text, see if we have a binary file. This uses the same  /* Unless binary-files=text, see if we have a binary file. This uses the same
1520  rule as GNU grep, namely, a search for a binary zero byte near the start of the  rule as GNU grep, namely, a search for a binary zero byte near the start of the
1521  file. */  file. */
1522    
1523  if (binary_files != BIN_TEXT)  if (binary_files != BIN_TEXT)
1524    {    {
1525    binary =    binary =
1526      memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;      memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1527    if (binary && binary_files == BIN_NOMATCH) return 1;    if (binary && binary_files == BIN_NOMATCH) return 1;
1528    }    }
1529    
1530  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1531  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 1191  while (ptr < endptr) Line 1603  while (ptr < endptr)
1603    
1604    
1605        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1606            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,            match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1607                PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);                PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1608    
1609        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
# Line 1206  while (ptr < endptr) Line 1618  while (ptr < endptr)
1618    }    }
1619  #endif  #endif
1620    
1621    /* We come back here after a match when the -o option (only_matching) is set,    /* We come back here after a match when show_only_matching is set, in order
1622    in order to find any further matches in the same line. */    to find any further matches in the same line. This applies to
1623      --only-matching, --file-offsets, and --line-offsets. */
1624    
1625    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1626    
# Line 1230  while (ptr < endptr) Line 1643  while (ptr < endptr)
1643      /* Just count if just counting is wanted. */      /* Just count if just counting is wanted. */
1644    
1645      if (count_only) count++;      if (count_only) count++;
1646    
1647      /* When handling a binary file and binary-files==binary, the "binary"      /* When handling a binary file and binary-files==binary, the "binary"
1648      variable will be set true (it's false in all other cases). In this      variable will be set true (it's false in all other cases). In this
1649      situation we just want to output the file name. No need to scan further. */      situation we just want to output the file name. No need to scan further. */
1650    
1651      else if (binary)      else if (binary)
1652        {        {
1653        fprintf(stdout, "Binary file %s matches\n", filename);        fprintf(stdout, "Binary file %s matches\n", filename);
1654        return 0;        return 0;
1655        }        }
1656    
1657      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1658      in the file. */      in the file. */
# Line 1254  while (ptr < endptr) Line 1667  while (ptr < endptr)
1667    
1668      else if (quiet) return 0;      else if (quiet) return 0;
1669    
1670      /* The --only-matching option prints just the substring that matched, or a      /* The --only-matching option prints just the substring that matched,
1671      captured portion of it, as long as this string is not empty, and the      and/or one or more captured portions of it, as long as these strings are
1672      --file-offsets and --line-offsets options output offsets for the matching      not empty. The --file-offsets and --line-offsets options output offsets for
1673      substring (they both force --only-matching = 0). None of these options      the matching substring (all three set show_only_matching). None of these
1674      prints any context. Afterwards, adjust the start and then jump back to look      mutually exclusive options prints any context. Afterwards, adjust the start
1675      for further matches in the same line. If we are in invert mode, however,      and then jump back to look for further matches in the same line. If we are
1676      nothing is printed and we do not restart - this could still be useful      in invert mode, however, nothing is printed and we do not restart - this
1677      because the return code is set. */      could still be useful because the return code is set. */
1678    
1679      else if (only_matching >= 0)      else if (show_only_matching)
1680        {        {
1681        if (!invert)        if (!invert)
1682          {          {
1683          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1684          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1685    
1686            /* Handle --line-offsets */
1687    
1688          if (line_offsets)          if (line_offsets)
1689            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1690              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1691    
1692            /* Handle --file-offsets */
1693    
1694          else if (file_offsets)          else if (file_offsets)
1695            fprintf(stdout, "%d,%d\n",            fprintf(stdout, "%d,%d\n",
1696              (int)(filepos + matchptr + offsets[0] - ptr),              (int)(filepos + matchptr + offsets[0] - ptr),
1697              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1698          else if (only_matching < mrc)  
1699            /* Handle --only-matching, which may occur many times */
1700    
1701            else
1702            {            {
1703            int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];            BOOL printed = FALSE;
1704            if (plen > 0)            omstr *om;
1705    
1706              for (om = only_matching; om != NULL; om = om->next)
1707              {              {
1708              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);              int n = om->groupnum;
1709              FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);              if (n < mrc)
1710              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);                {
1711              fprintf(stdout, "\n");                int plen = offsets[2*n + 1] - offsets[2*n];
1712                  if (plen > 0)
1713                    {
1714                    if (printed) fprintf(stdout, "%s", om_separator);
1715                    if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1716                    FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1717                    if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1718                    printed = TRUE;
1719                    }
1720                  }
1721              }              }
1722    
1723              if (printed || printname != NULL || number) fprintf(stdout, "\n");
1724            }            }
1725          else if (printname != NULL || number) fprintf(stdout, "\n");  
1726            /* Prepare to repeat to find the next match */
1727    
1728          match = FALSE;          match = FALSE;
1729          if (line_buffered) fflush(stdout);          if (line_buffered) fflush(stdout);
1730          rc = 0;                      /* Had some success */          rc = 0;                      /* Had some success */
# Line 1550  while (ptr < endptr) Line 1987  while (ptr < endptr)
1987  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1988  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1989    
1990  if (only_matching < 0 && !count_only)  if (!show_only_matching && !count_only)
1991    {    {
1992    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1993    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1594  Arguments: Line 2031  Arguments:
2031    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
2032    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
2033    
2034  Returns:   0 if there was at least one match  Returns:  -1 the file/directory was skipped
2035               0 if there was at least one match
2036             1 if there were no matches             1 if there were no matches
2037             2 there was some kind of error             2 there was some kind of error
2038    
# Line 1605  static int Line 2043  static int
2043  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2044  {  {
2045  int rc = 1;  int rc = 1;
 int sep;  
2046  int frtype;  int frtype;
2047  void *handle;  void *handle;
2048    char *lastcomp;
2049  FILE *in = NULL;           /* Ensure initialized */  FILE *in = NULL;           /* Ensure initialized */
2050    
2051  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 1631  if (strcmp(pathname, "-") == 0) Line 2069  if (strcmp(pathname, "-") == 0)
2069        stdin_name : NULL);        stdin_name : NULL);
2070    }    }
2071    
2072  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2073  each file and directory within it, subject to any include or exclude patterns  directories, whereas --include and --exclude apply to everything else. The test
2074  that were set. The scanning code is localized so it can be made  is against the final component of the path. */
2075  system-specific. */  
2076    lastcomp = strrchr(pathname, FILESEP);
2077    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2078    
2079    /* If the file is a directory, skip if not recursing or if explicitly excluded.
2080    Otherwise, scan the directory and recurse for each path within it. The scanning
2081    code is localized so it can be made system-specific. */
2082    
2083    if (isdirectory(pathname))
2084      {
2085      if (dee_action == dee_SKIP ||
2086          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2087        return -1;
2088    
 if ((sep = isdirectory(pathname)) != 0)  
   {  
   if (dee_action == dee_SKIP) return 1;  
2089    if (dee_action == dee_RECURSE)    if (dee_action == dee_RECURSE)
2090      {      {
2091      char buffer[1024];      char buffer[1024];
# Line 1655  if ((sep = isdirectory(pathname)) != 0) Line 2102  if ((sep = isdirectory(pathname)) != 0)
2102    
2103      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
2104        {        {
2105        int frc, nflen;        int frc;
2106        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
       nflen = (int)(strlen(nextfile));  
   
       if (isdirectory(buffer))  
         {  
         if (exclude_dir_compiled != NULL &&  
             pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)  
           continue;  
   
         if (include_dir_compiled != NULL &&  
             pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)  
           continue;  
         }  
       else  
         {  
         if (exclude_compiled != NULL &&  
             pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)  
           continue;  
   
         if (include_compiled != NULL &&  
             pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)  
           continue;  
         }  
   
2107        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2108        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
2109         else if (frc == 0 && rc == 1) rc = 0;         else if (frc == 0 && rc == 1) rc = 0;
# Line 1691  if ((sep = isdirectory(pathname)) != 0) Line 2115  if ((sep = isdirectory(pathname)) != 0)
2115    }    }
2116    
2117  /* If the file is not a directory and not a regular file, skip it if that's  /* If the file is not a directory and not a regular file, skip it if that's
2118  been requested. */  been requested. Otherwise, check for explicit include/exclude. */
2119    
2120  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;  else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2121              !test_incexc(lastcomp, include_patterns, exclude_patterns))
2122            return -1;
2123    
2124  /* Control reaches here if we have a regular file, or if we have a directory  /* Control reaches here if we have a regular file, or if we have a directory
2125  and recursion or skipping was not requested, or if we have anything else and  and recursion or skipping was not requested, or if we have anything else and
# Line 1808  return rc; Line 2234  return rc;
2234    
2235    
2236    
   
 /*************************************************  
 *                Usage function                  *  
 *************************************************/  
   
 static int  
 usage(int rc)  
 {  
 option_item *op;  
 fprintf(stderr, "Usage: pcregrep [-");  
 for (op = optionlist; op->one_char != 0; op++)  
   {  
   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);  
   }  
 fprintf(stderr, "] [long options] [pattern] [files]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information and the long "  
   "options.\n");  
 return rc;  
 }  
   
   
   
   
 /*************************************************  
 *                Help function                   *  
 *************************************************/  
   
 static void  
 help(void)  
 {  
 option_item *op;  
   
 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  
 printf("Search for PATTERN in each FILE or standard input.\n");  
 printf("PATTERN must be present if neither -e nor -f is used.\n");  
 printf("\"-\" can be used as a file name to mean STDIN.\n");  
   
 #ifdef SUPPORT_LIBZ  
 printf("Files whose names end in .gz are read using zlib.\n");  
 #endif  
   
 #ifdef SUPPORT_LIBBZ2  
 printf("Files whose names end in .bz2 are read using bzlib2.\n");  
 #endif  
   
 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2  
 printf("Other files and the standard input are read as plain files.\n\n");  
 #else  
 printf("All files are read as plain files, without any interpretation.\n\n");  
 #endif  
   
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
 printf("Options:\n");  
   
 for (op = optionlist; op->one_char != 0; op++)  
   {  
   int n;  
   char s[4];  
   
   /* Two options were accidentally implemented and documented with underscores  
   instead of hyphens in their names, something that was not noticed for quite a  
   few releases. When fixing this, I left the underscored versions in the list  
   in case people were using them. However, we don't want to display them in the  
   help data. There are no other options that contain underscores, and we do not  
   expect ever to implement such options. Therefore, just omit any option that  
   contains an underscore. */  
   
   if (strchr(op->long_name, '_') != NULL) continue;  
   
   if (op->one_char > 0 && (op->long_name)[0] == 0)  
     n = 31 - printf("  -%c", op->one_char);  
   else  
     {  
     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);  
       else strcpy(s, "   ");  
     n = 31 - printf("  %s --%s", s, op->long_name);  
     }  
   
   if (n < 1) n = 1;  
   printf("%.*s%s\n", n, "                           ", op->help_text);  
   }  
   
 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");  
 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);  
 printf("When reading patterns or file names from a file, trailing white\n");  
 printf("space is removed and blank lines are ignored.\n");  
 printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",  
   MAX_PATTERN_COUNT, PATBUFSIZE);  
   
 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
 }  
   
   
   
   
2237  /*************************************************  /*************************************************
2238  *    Handle a single-letter, no data option      *  *    Handle a single-letter, no data option      *
2239  *************************************************/  *************************************************/
# Line 1929  switch(letter) Line 2259  switch(letter)
2259    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
2260    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2261    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
2262    case 'o': only_matching = 0; break;  
2263      case 'o':
2264      only_matching_last = add_number(0, only_matching_last);
2265      if (only_matching == NULL) only_matching = only_matching_last;
2266      break;
2267    
2268    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
2269    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
2270    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1939  switch(letter) Line 2274  switch(letter)
2274    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
2275    
2276    case 'V':    case 'V':
2277    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stdout, "pcregrep version %s\n", pcre_version());
2278    pcregrep_exit(0);    pcregrep_exit(0);
2279    break;    break;
2280    
# Line 1983  return buffer; Line 2318  return buffer;
2318  *          Compile a single pattern              *  *          Compile a single pattern              *
2319  *************************************************/  *************************************************/
2320    
2321  /* When the -F option has been used, this is called for each substring.  /* Do nothing if the pattern has already been compiled. This is the case for
2322  Otherwise it's called for each supplied pattern.  include/exclude patterns read from a file.
2323    
2324    When the -F option has been used, each "pattern" may be a list of strings,
2325    separated by line breaks. They will be matched literally. We split such a
2326    string and compile the first substring, inserting an additional block into the
2327    pattern chain.
2328    
2329  Arguments:  Arguments:
2330    pattern        the pattern string    p              points to the pattern block
2331    options        the PCRE options    options        the PCRE options
2332    filename       the file name, or NULL for a command-line pattern    popts          the processing options
2333      fromfile       TRUE if the pattern was read from a file
2334      fromtext       file name or identifying text (e.g. "include")
2335    count          0 if this is the only command line pattern, or    count          0 if this is the only command line pattern, or
2336                   number of the command line pattern, or                   number of the command line pattern, or
2337                   linenumber for a pattern from a file                   linenumber for a pattern from a file
# Line 1998  Returns:         TRUE on success, FALSE Line 2340  Returns:         TRUE on success, FALSE
2340  */  */
2341    
2342  static BOOL  static BOOL
2343  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_pattern(patstr *p, int options, int popts, int fromfile,
2344      const char *fromtext, int count)
2345  {  {
2346  char buffer[PATBUFSIZE];  char buffer[PATBUFSIZE];
2347  const char *error;  const char *error;
2348    char *ps = p->string;
2349    int patlen = strlen(ps);
2350  int errptr;  int errptr;
2351    
2352  if (pattern_count >= MAX_PATTERN_COUNT)  if (p->compiled != NULL) return TRUE;
   {  
   fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",  
     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);  
   return FALSE;  
   }  
2353    
2354  sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,  if ((popts & PO_FIXED_STRINGS) != 0)
   suffix[process_options]);  
 pattern_list[pattern_count] =  
   pcre_compile(buffer, options, &error, &errptr, pcretables);  
 if (pattern_list[pattern_count] != NULL)  
2355    {    {
2356    pattern_count++;    int ellength;
2357    return TRUE;    char *eop = ps + patlen;
2358      char *pe = end_of_line(ps, eop, &ellength);
2359    
2360      if (ellength != 0)
2361        {
2362        if (add_pattern(pe, p) == NULL) return FALSE;
2363        patlen = (int)(pe - ps - ellength);
2364        }
2365    }    }
2366    
2367    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2368    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2369    if (p->compiled != NULL) return TRUE;
2370    
2371  /* Handle compile errors */  /* Handle compile errors */
2372    
2373  errptr -= (int)strlen(prefix[process_options]);  errptr -= (int)strlen(prefix[popts]);
2374  if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);  if (errptr > patlen) errptr = patlen;
2375    
2376  if (filename == NULL)  if (fromfile)
2377    {    {
2378    if (count == 0)    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2379      fprintf(stderr, "pcregrep: Error in command-line regex "      "at offset %d: %s\n", count, fromtext, errptr, error);
       "at offset %d: %s\n", errptr, error);  
   else  
     fprintf(stderr, "pcregrep: Error in %s command-line regex "  
       "at offset %d: %s\n", ordin(count), errptr, error);  
2380    }    }
2381  else  else
2382    {    {
2383    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "    if (count == 0)
2384      "at offset %d: %s\n", count, filename, errptr, error);      fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2385          fromtext, errptr, error);
2386      else
2387        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2388          ordin(count), fromtext, errptr, error);
2389    }    }
2390    
2391  return FALSE;  return FALSE;
# Line 2047  return FALSE; Line 2394  return FALSE;
2394    
2395    
2396  /*************************************************  /*************************************************
2397  *           Compile one supplied pattern         *  *     Read and compile a file of patterns        *
2398  *************************************************/  *************************************************/
2399    
2400  /* When the -F option has been used, each string may be a list of strings,  /* This is used for --filelist, --include-from, and --exclude-from.
 separated by line breaks. They will be matched literally.  
2401    
2402  Arguments:  Arguments:
2403    pattern        the pattern string    name         the name of the file; "-" is stdin
2404    options        the PCRE options    patptr       pointer to the pattern chain anchor
2405    filename       the file name, or NULL for a command-line pattern    patlastptr   pointer to the last pattern pointer
2406    count          0 if this is the only command line pattern, or    popts        the process options to pass to pattern_compile()
                  number of the command line pattern, or  
                  linenumber for a pattern from a file  
2407    
2408  Returns:         TRUE on success, FALSE after an error  Returns:       TRUE if all went well
2409  */  */
2410    
2411  static BOOL  static BOOL
2412  compile_pattern(char *pattern, int options, char *filename, int count)  read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2413  {  {
2414  if ((process_options & PO_FIXED_STRINGS) != 0)  int linenumber = 0;
2415    FILE *f;
2416    char *filename;
2417    char buffer[PATBUFSIZE];
2418    
2419    if (strcmp(name, "-") == 0)
2420    {    {
2421    char *eop = pattern + strlen(pattern);    f = stdin;
2422    char buffer[PATBUFSIZE];    filename = stdin_name;
2423      }
2424    else
2425      {
2426      f = fopen(name, "r");
2427      if (f == NULL)
2428        {
2429        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2430        return FALSE;
2431        }
2432      filename = name;
2433      }
2434    
2435    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2436      {
2437      char *s = buffer + (int)strlen(buffer);
2438      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2439      *s = 0;
2440      linenumber++;
2441      if (buffer[0] == 0) continue;   /* Skip blank lines */
2442    
2443      /* Note: this call to add_pattern() puts a pointer to the local variable
2444      "buffer" into the pattern chain. However, that pointer is used only when
2445      compiling the pattern, which happens immediately below, so we flatten it
2446      afterwards, as a precaution against any later code trying to use it. */
2447    
2448      *patlastptr = add_pattern(buffer, *patlastptr);
2449      if (*patlastptr == NULL) return FALSE;
2450      if (*patptr == NULL) *patptr = *patlastptr;
2451    
2452      /* This loop is needed because compiling a "pattern" when -F is set may add
2453      on additional literal patterns if the original contains a newline. In the
2454      common case, it never will, because fgets() stops at a newline. However,
2455      the -N option can be used to give pcregrep a different newline setting. */
2456    
2457    for(;;)    for(;;)
2458      {      {
2459      int ellength;      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2460      char *p = end_of_line(pattern, eop, &ellength);          linenumber))
     if (ellength == 0)  
       return compile_single_pattern(pattern, options, filename, count);  
     sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);  
     pattern = p;  
     if (!compile_single_pattern(buffer, options, filename, count))  
2461        return FALSE;        return FALSE;
2462        (*patlastptr)->string = NULL;            /* Insurance */
2463        if ((*patlastptr)->next == NULL) break;
2464        *patlastptr = (*patlastptr)->next;
2465      }      }
2466    }    }
2467  else return compile_single_pattern(pattern, options, filename, count);  
2468    if (f != stdin) fclose(f);
2469    return TRUE;
2470  }  }
2471    
2472    
# Line 2099  main(int argc, char **argv) Line 2482  main(int argc, char **argv)
2482  {  {
2483  int i, j;  int i, j;
2484  int rc = 1;  int rc = 1;
 int pcre_options = 0;  
 int cmd_pattern_count = 0;  
 int hint_count = 0;  
 int errptr;  
2485  BOOL only_one_at_top;  BOOL only_one_at_top;
2486  char *patterns[MAX_PATTERN_COUNT];  patstr *cp;
2487    fnstr *fn;
2488  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2489  const char *error;  const char *error;
2490    
# Line 2144  for (i = 1; i < argc; i++) Line 2524  for (i = 1; i < argc; i++)
2524    
2525    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2526      {      {
2527      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_files != NULL || patterns != NULL) break;
2528        else pcregrep_exit(usage(2));        else pcregrep_exit(usage(2));
2529      }      }
2530    
# Line 2270  for (i = 1; i < argc; i++) Line 2650  for (i = 1; i < argc; i++)
2650      {      {
2651      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2652      longop = FALSE;      longop = FALSE;
2653    
2654      while (*s != 0)      while (*s != 0)
2655        {        {
2656        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
# Line 2282  for (i = 1; i < argc; i++) Line 2663  for (i = 1; i < argc; i++)
2663            *s, argv[i]);            *s, argv[i]);
2664          pcregrep_exit(usage(2));          pcregrep_exit(usage(2));
2665          }          }
2666    
2667        /* Check for a single-character option that has data: OP_OP_NUMBER        option_data = s+1;
2668        is used for one that either has a numerical number or defaults, i.e. the  
2669        data is optional. If a digit follows, there is data; if not, carry on        /* Break out if this is the last character in the string; it's handled
2670          below like a single multi-char option. */
2671    
2672          if (*option_data == 0) break;
2673    
2674          /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2675          are used for ones that either have a numerical number or defaults, i.e.
2676          the data is optional. If a digit follows, there is data; if not, carry on
2677        with other single-character options in the same string. */        with other single-character options in the same string. */
2678    
2679        option_data = s+1;        if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
       if (op->type == OP_OP_NUMBER)  
2680          {          {
2681          if (isdigit((unsigned char)s[1])) break;          if (isdigit((unsigned char)s[1])) break;
2682          }          }
2683        else   /* Check for end or a dataless option */        else   /* Check for an option with data */
2684          {          {
2685          if (op->type != OP_NODATA || s[1] == 0) break;          if (op->type != OP_NODATA) break;
2686          }          }
2687    
2688        /* Handle a single-character option with no data, then loop for the        /* Handle a single-character option with no data, then loop for the
2689        next character in the string. */        next character in the string. */
2690    
2691        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2692        }        }
2693      }      }
2694    
2695    /* At this point we should have op pointing to a matched option. If the type    /* At this point we should have op pointing to a matched option. If the type
2696    is NO_DATA, it means that there is no data, and the option might set    is NO_DATA, it means that there is no data, and the option might set
2697    something in the PCRE options. */    something in the PCRE options. */
# Line 2315  for (i = 1; i < argc; i++) Line 2702  for (i = 1; i < argc; i++)
2702      continue;      continue;
2703      }      }
2704    
2705    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2706    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2707    separate item. At the moment, the only such options are "colo(u)r",    separate item. At the moment, the only such options are "colo(u)r",
2708    "only-matching", and Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2709    
2710    if (*option_data == 0 &&    if (*option_data == 0 &&
2711        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2712           op->type == OP_OP_NUMBERS))
2713      {      {
2714      switch (op->one_char)      switch (op->one_char)
2715        {        {
# Line 2330  for (i = 1; i < argc; i++) Line 2718  for (i = 1; i < argc; i++)
2718        break;        break;
2719    
2720        case 'o':        case 'o':
2721        only_matching = 0;        only_matching_last = add_number(0, only_matching_last);
2722          if (only_matching == NULL) only_matching = only_matching_last;
2723        break;        break;
2724    
2725  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
# Line 2354  for (i = 1; i < argc; i++) Line 2743  for (i = 1; i < argc; i++)
2743      option_data = argv[++i];      option_data = argv[++i];
2744      }      }
2745    
2746    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2747    multiple times to create a list of patterns. */    added to a chain of numbers. */
2748    
2749    if (op->type == OP_PATLIST)    if (op->type == OP_OP_NUMBERS)
2750      {      {
2751      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      unsigned long int n = decode_number(option_data, op, longop);
2752        omdatastr *omd = (omdatastr *)op->dataptr;
2753        *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2754        if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2755        }
2756    
2757      /* If the option type is OP_PATLIST, it's the -e option, or one of the
2758      include/exclude options, which can be called multiple times to create lists
2759      of patterns. */
2760    
2761      else if (op->type == OP_PATLIST)
2762        {
2763        patdatastr *pd = (patdatastr *)op->dataptr;
2764        *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2765        if (*(pd->lastptr) == NULL) goto EXIT2;
2766        if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2767        }
2768    
2769      /* If the option type is OP_FILELIST, it's one of the options that names a
2770      file. */
2771    
2772      else if (op->type == OP_FILELIST)
2773        {
2774        fndatastr *fd = (fndatastr *)op->dataptr;
2775        fn = (fnstr *)malloc(sizeof(fnstr));
2776        if (fn == NULL)
2777        {        {
2778        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",        fprintf(stderr, "pcregrep: malloc failed\n");
2779          MAX_PATTERN_COUNT);        goto EXIT2;
       return 2;  
2780        }        }
2781      patterns[cmd_pattern_count++] = option_data;      fn->next = NULL;
2782        fn->name = option_data;
2783        if (*(fd->anchor) == NULL)
2784          *(fd->anchor) = fn;
2785        else
2786          (*(fd->lastptr))->next = fn;
2787        *(fd->lastptr) = fn;
2788      }      }
2789    
2790    /* Handle OP_BINARY_FILES */    /* Handle OP_BINARY_FILES */
2791    
2792    else if (op->type == OP_BINFILES)    else if (op->type == OP_BINFILES)
2793      {      {
2794      if (strcmp(option_data, "binary") == 0)      if (strcmp(option_data, "binary") == 0)
# Line 2380  for (i = 1; i < argc; i++) Line 2799  for (i = 1; i < argc; i++)
2799        binary_files = BIN_TEXT;        binary_files = BIN_TEXT;
2800      else      else
2801        {        {
2802        fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",        fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2803          option_data);          option_data);
2804        pcregrep_exit(usage(2));        pcregrep_exit(usage(2));
2805        }        }
2806      }      }
2807    
2808    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with a single string or numeric data value. */
2809    
2810    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2811             op->type != OP_OP_NUMBER)             op->type != OP_OP_NUMBER)
2812      {      {
2813      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2814      }      }
   
   /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used  
   only for unpicking arguments, so just keep it simple. */  
   
2815    else    else
2816      {      {
2817      unsigned long int n = 0;      unsigned long int n = decode_number(option_data, op, longop);
2818      char *endptr = option_data;      if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2819      while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;        else *((int *)op->dataptr) = n;
     while (isdigit((unsigned char)(*endptr)))  
       n = n * 10 + (int)(*endptr++ - '0');  
     if (toupper(*endptr) == 'K')  
       {  
       n *= 1024;  
       endptr++;  
       }  
     else if (toupper(*endptr) == 'M')  
       {  
       n *= 1024*1024;  
       endptr++;  
       }  
     if (*endptr != 0)  
       {  
       if (longop)  
         {  
         char *equals = strchr(op->long_name, '=');  
         int nlen = (equals == NULL)? (int)strlen(op->long_name) :  
           (int)(equals - op->long_name);  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",  
           option_data, nlen, op->long_name);  
         }  
       else  
         fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",  
           option_data, op->one_char);  
       pcregrep_exit(usage(2));  
       }  
     if (op->type == OP_LONGNUMBER)  
         *((unsigned long int *)op->dataptr) = n;  
     else  
         *((int *)op->dataptr) = n;  
2820      }      }
2821    }    }
2822    
# Line 2446  if (both_context > 0) Line 2830  if (both_context > 0)
2830    }    }
2831    
2832  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2833  However, the latter two set only_matching. */  However, all three set show_only_matching because they display, each in their
2834    own way, only the data that has matched. */
2835    
2836  if ((only_matching >= 0 && (file_offsets || line_offsets)) ||  if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2837      (file_offsets && line_offsets))      (file_offsets && line_offsets))
2838    {    {
2839    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
# Line 2456  if ((only_matching >= 0 && (file_offsets Line 2841  if ((only_matching >= 0 && (file_offsets
2841    pcregrep_exit(usage(2));    pcregrep_exit(usage(2));
2842    }    }
2843    
2844  if (file_offsets || line_offsets) only_matching = 0;  if (only_matching != NULL || file_offsets || line_offsets)
2845      show_only_matching = TRUE;
2846    
2847  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2848  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2580  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 2966  if (jfriedl_XT != 0 || jfriedl_XR != 0)
2966    }    }
2967  #endif  #endif
2968    
2969  /* Get memory for the main buffer, and to store the pattern and hints lists. */  /* Get memory for the main buffer. */
2970    
2971  bufsize = 3*bufthird;  bufsize = 3*bufthird;
2972  main_buffer = (char *)malloc(bufsize);  main_buffer = (char *)malloc(bufsize);
 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  
 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  
2973    
2974  if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL)
2975    {    {
2976    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2977    goto EXIT2;    goto EXIT2;
2978    }    }
2979    
2980  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there are no files provided by -f,
2981  the first argument is the one and only pattern, and it must exist. */  the first argument is the one and only pattern, and it must exist. */
2982    
2983  if (cmd_pattern_count == 0 && pattern_filename == NULL)  if (patterns == NULL && pattern_files == NULL)
2984    {    {
2985    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2986    patterns[cmd_pattern_count++] = argv[i++];    patterns = patterns_last = add_pattern(argv[i++], NULL);
2987      if (patterns == NULL) goto EXIT2;
2988    }    }
2989    
2990  /* Compile the patterns that were provided on the command line, either by  /* Compile the patterns that were provided on the command line, either by
2991  multiple uses of -e or as a single unkeyed pattern. */  multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2992    after all the command-line options are read so that we know which PCRE options
2993    to use. When -F is used, compile_pattern() may add another block into the
2994    chain, so we must not access the next pointer till after the compile. */
2995    
2996  for (j = 0; j < cmd_pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2997    {    {
2998    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
2999         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 1 && patterns->next == NULL)? 0 : j))
3000      goto EXIT2;      goto EXIT2;
3001    }    }
3002    
3003  /* Compile the regular expressions that are provided in a file. */  /* Read and compile the regular expressions that are provided in files. */
3004    
3005  if (pattern_filename != NULL)  for (fn = pattern_files; fn != NULL; fn = fn->next)
3006    {    {
3007    int linenumber = 0;    if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3008    FILE *f;      goto EXIT2;
3009    char *filename;    }
   char buffer[PATBUFSIZE];  
   
   if (strcmp(pattern_filename, "-") == 0)  
     {  
     f = stdin;  
     filename = stdin_name;  
     }  
   else  
     {  
     f = fopen(pattern_filename, "r");  
     if (f == NULL)  
       {  
       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,  
         strerror(errno));  
       goto EXIT2;  
       }  
     filename = pattern_filename;  
     }  
3010    
3011    while (fgets(buffer, PATBUFSIZE, f) != NULL)  /* Study the regular expressions, as we will be running them many times. If an
3012      {  extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3013      char *s = buffer + (int)strlen(buffer);  returned, even if studying produces no data. */
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;  
     *s = 0;  
     linenumber++;  
     if (buffer[0] == 0) continue;   /* Skip blank lines */  
     if (!compile_pattern(buffer, pcre_options, filename, linenumber))  
       goto EXIT2;  
     }  
3014    
3015    if (f != stdin) fclose(f);  if (match_limit > 0 || match_limit_recursion > 0)
3016    }    study_options |= PCRE_STUDY_EXTRA_NEEDED;
3017    
3018  /* Study the regular expressions, as we will be running them many times. Unless  /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
 JIT has been explicitly disabled, arrange a stack for it to use. */  
3019    
3020  #ifdef SUPPORT_PCREGREP_JIT  #ifdef SUPPORT_PCREGREP_JIT
3021  if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)  if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3022    jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);    jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3023  #endif  #endif
3024    
3025  for (j = 0; j < pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3026    {    {
3027    hints_list[j] = pcre_study(pattern_list[j], study_options, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
3028    if (error != NULL)    if (error != NULL)
3029      {      {
3030      char s[16];      char s[16];
3031      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3032      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3033      goto EXIT2;      goto EXIT2;
3034      }      }
   hint_count++;  
3035  #ifdef SUPPORT_PCREGREP_JIT  #ifdef SUPPORT_PCREGREP_JIT
3036    if (jit_stack != NULL && hints_list[j] != NULL)    if (jit_stack != NULL && cp->hint != NULL)
3037      pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);      pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3038  #endif  #endif
3039    }    }
3040    
3041  /* If --match-limit or --recursion-limit was set, put the value(s) into the  /* If --match-limit or --recursion-limit was set, put the value(s) into the
3042  pcre_extra block for each pattern. */  pcre_extra block for each pattern. There will always be an extra block because
3043    of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3044    
3045  if (match_limit > 0 || match_limit_recursion > 0)  for (cp = patterns; cp != NULL; cp = cp->next)
3046    {    {
3047    for (j = 0; j < pattern_count; j++)    if (match_limit > 0)
3048      {      {
3049      if (hints_list[j] == NULL)      cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3050        {      cp->hint->match_limit = match_limit;
3051        hints_list[j] = malloc(sizeof(pcre_extra));      }
3052        if (hints_list[j] == NULL)  
3053          {    if (match_limit_recursion > 0)
3054          fprintf(stderr, "pcregrep: malloc failed\n");      {
3055          pcregrep_exit(2);      cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3056          }      cp->hint->match_limit_recursion = match_limit_recursion;
       }  
     if (match_limit > 0)  
       {  
       hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;  
       hints_list[j]->match_limit = match_limit;  
       }  
     if (match_limit_recursion > 0)  
       {  
       hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;  
       hints_list[j]->match_limit_recursion = match_limit_recursion;  
       }  
3057      }      }
3058    }    }
3059    
3060  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns read from the command line, compile
3061    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3062    0. */
3063    
3064  if (exclude_pattern != NULL)  for (j = 0; j < 4; j++)
3065    {    {
3066    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,    int k;
3067      pcretables);    for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
   if (exclude_compiled == NULL)  
3068      {      {
3069      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3070        errptr, error);           (k == 1 && cp->next == NULL)? 0 : k))
3071      goto EXIT2;        goto EXIT2;
3072      }      }
3073    }    }
3074    
3075  if (include_pattern != NULL)  /* Read and compile include/exclude patterns from files. */
3076    
3077    for (fn = include_from; fn != NULL; fn = fn->next)
3078    {    {
3079    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,    if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
     pcretables);  
   if (include_compiled == NULL)  
     {  
     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",  
       errptr, error);  
3080      goto EXIT2;      goto EXIT2;
     }  
3081    }    }
3082    
3083  if (exclude_dir_pattern != NULL)  for (fn = exclude_from; fn != NULL; fn = fn->next)
3084    {    {
3085    exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,    if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
     pcretables);  
   if (exclude_dir_compiled == NULL)  
     {  
     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",  
       errptr, error);  
3086      goto EXIT2;      goto EXIT2;
     }  
3087    }    }
3088    
3089  if (include_dir_pattern != NULL)  /* If there are no files that contain lists of files to search, and there are
3090    no file arguments, search stdin, and then exit. */
3091    
3092    if (file_lists == NULL && i >= argc)
3093    {    {
3094    include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3095      pcretables);      (filenames > FN_DEFAULT)? stdin_name : NULL);
3096    if (include_dir_compiled == NULL)    goto EXIT;
     {  
     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",  
       errptr, error);  
     goto EXIT2;  
     }  
3097    }    }
   
 /* If a file that contains a list of files to search has been specified, read  
 it line by line and search the given files. Otherwise, if there are no further  
 arguments, do the business on stdin and exit. */  
3098    
3099  if (file_list != NULL)  /* If any files that contains a list of files to search have been specified,
3100    read them line by line and search the given files. */
3101    
3102    for (fn = file_lists; fn != NULL; fn = fn->next)
3103    {    {
3104    char buffer[PATBUFSIZE];    char buffer[PATBUFSIZE];
3105    FILE *fl;    FILE *fl;
3106    if (strcmp(file_list, "-") == 0) fl = stdin; else    if (strcmp(fn->name, "-") == 0) fl = stdin; else
3107      {      {
3108      fl = fopen(file_list, "rb");      fl = fopen(fn->name, "rb");
3109      if (fl == NULL)      if (fl == NULL)
3110        {        {
3111        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", file_list,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3112          strerror(errno));          strerror(errno));
3113        goto EXIT2;        goto EXIT2;
3114        }        }
3115      }      }
3116    while (fgets(buffer, PATBUFSIZE, fl) != NULL)    while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3117      {      {
3118      int frc;      int frc;
3119      char *end = buffer + (int)strlen(buffer);      char *end = buffer + (int)strlen(buffer);
3120      while (end > buffer && isspace(end[-1])) end--;      while (end > buffer && isspace(end[-1])) end--;
3121      *end = 0;      *end = 0;
3122      if (*buffer != 0)      if (*buffer != 0)
3123        {        {
3124        frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);        frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3125        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
3126          else if (frc == 0 && rc == 1) rc = 0;          else if (frc == 0 && rc == 1) rc = 0;
3127        }        }
3128      }      }
3129    if (fl != stdin) fclose (fl);    if (fl != stdin) fclose(fl);
   }  
   
 /* Do this only if there was no file list (and no file arguments). */  
   
 else if (i >= argc)  
   {  
   rc = pcregrep(stdin, FR_PLAIN, stdin_name,  
     (filenames > FN_DEFAULT)? stdin_name : NULL);  
   goto EXIT;  
3130    }    }
3131    
3132  /* After handling file-list or if there are remaining arguments, work through  /* After handling file-list, work through remaining arguments. Pass in the fact
3133  them as files or directories. Pass in the fact that there is only one argument  that there is only one argument at top level - this suppresses the file name if
3134  at top level - this suppresses the file name if the argument is not a directory  the argument is not a directory and filenames are not otherwise forced. */
 and filenames are not otherwise forced. */  
3135    
3136  only_one_at_top = i == argc - 1 && file_list == NULL;  only_one_at_top = i == argc - 1 && file_lists == NULL;
3137    
3138  for (; i < argc; i++)  for (; i < argc; i++)
3139    {    {
# Line 2818  EXIT: Line 3147  EXIT:
3147  #ifdef SUPPORT_PCREGREP_JIT  #ifdef SUPPORT_PCREGREP_JIT
3148  if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);  if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3149  #endif  #endif
3150    
3151  if (main_buffer != NULL) free(main_buffer);  if (main_buffer != NULL) free(main_buffer);
3152  if (pattern_list != NULL)  
3153    {  free_pattern_chain(patterns);
3154    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);  free_pattern_chain(include_patterns);
3155    free(pattern_list);  free_pattern_chain(include_dir_patterns);
3156    }  free_pattern_chain(exclude_patterns);
3157  if (hints_list != NULL)  free_pattern_chain(exclude_dir_patterns);
3158    {  
3159    for (i = 0; i < hint_count; i++)  free_file_chain(exclude_from);
3160      {  free_file_chain(include_from);
3161      if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);  free_file_chain(pattern_files);
3162      }  free_file_chain(file_lists);
3163    free(hints_list);  
3164    while (only_matching != NULL)
3165      {
3166      omstr *this = only_matching;
3167      only_matching = this->next;
3168      free(this);
3169    }    }
3170    
3171  pcregrep_exit(rc);  pcregrep_exit(rc);
3172    
3173  EXIT2:  EXIT2:

Legend:
Removed from v.971  
changed lines
  Added in v.1096

  ViewVC Help
Powered by ViewVC 1.1.5