/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 589 by ph10, Sat Jan 15 11:31:39 2011 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
# Line 45  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.0 07-Jun-2005"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 64  typedef int BOOL; Line 79  typedef int BOOL;
79  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
80  #endif  #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
118  *               Global variables                 *  *               Global variables                 *
119  *************************************************/  *************************************************/
120    
121    /* Jeffrey Friedl has some debugging requirements that are not part of the
122    regular code. */
123    
124    #ifdef JFRIEDL_DEBUG
125    static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130    #endif
131    
132    static int  endlinetype;
133    
134    static char *colour_string = (char *)"1;31";
135    static char *colour_option = NULL;
136    static char *dee_option = NULL;
137    static char *DEE_option = NULL;
138    static char *newline = NULL;
139  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
140  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
141    static char *locale = NULL;
142    
143    static const unsigned char *pcretables = NULL;
144    
145  static int  pattern_count = 0;  static int  pattern_count = 0;
146  static pcre **pattern_list;  static pcre **pattern_list = NULL;
147  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
148    
149  static char *include_pattern = NULL;  static char *include_pattern = NULL;
150  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
155  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159  static int after_context = 0;  static int after_context = 0;
160  static int before_context = 0;  static int before_context = 0;
161  static int both_context = 0;  static int both_context = 0;
162    static int dee_action = dee_READ;
163    static int DEE_action = DEE_READ;
164    static int error_count = 0;
165    static int filenames = FN_DEFAULT;
166    static int only_matching = -1;
167    static int process_options = 0;
168    
169    static unsigned long int match_limit = 0;
170    static unsigned long int match_limit_recursion = 0;
171    
172  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
173  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
174  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
 static BOOL filenames_nomatch_only = FALSE;  
175  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
176  static BOOL invert = FALSE;  static BOOL invert = FALSE;
177    static BOOL line_buffered = FALSE;
178    static BOOL line_offsets = FALSE;
179  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
180  static BOOL number = FALSE;  static BOOL number = FALSE;
181    static BOOL omit_zero_count = FALSE;
182    static BOOL resource_error = FALSE;
183  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
 static BOOL recurse = FALSE;  
184  static BOOL silent = FALSE;  static BOOL silent = FALSE;
185  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
 static BOOL word_match = FALSE;  
186    
187  /* Structure for options and list of them */  /* Structure for options and list of them */
188    
189  enum { OP_NODATA, OP_STRING, OP_NUMBER };  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190           OP_OP_NUMBER, OP_PATLIST };
191    
192  typedef struct option_item {  typedef struct option_item {
193    int type;    int type;
# Line 112  typedef struct option_item { Line 197  typedef struct option_item {
197    const char *help_text;    const char *help_text;
198  } option_item;  } option_item;
199    
200    /* Options without a single-letter equivalent get a negative value. This can be
201    used to identify them. */
202    
203    #define N_COLOUR       (-1)
204    #define N_EXCLUDE      (-2)
205    #define N_EXCLUDE_DIR  (-3)
206    #define N_HELP         (-4)
207    #define N_INCLUDE      (-5)
208    #define N_INCLUDE_DIR  (-6)
209    #define N_LABEL        (-7)
210    #define N_LOCALE       (-8)
211    #define N_NULL         (-9)
212    #define N_LOFFSETS     (-10)
213    #define N_FOFFSETS     (-11)
214    #define N_LBUFFER      (-12)
215    #define N_M_LIMIT      (-13)
216    #define N_M_LIMIT_REC  (-14)
217    
218  static option_item optionlist[] = {  static option_item optionlist[] = {
219    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
220    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
221    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
222    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
223    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
224    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
225    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
226    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
227    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
228    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
229    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
230    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
231    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
232    { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
233    { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
234    { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
235    { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
236    { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
237    { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
238    { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
239    { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },    { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
240    { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },    { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
241    { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },    { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
242    { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },    { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
243    { OP_NODATA, 0,   NULL,               NULL,            NULL }    { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
245      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
247      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
248      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
249      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
250      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
251      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
252      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254    
255      /* These two were accidentally implemented with underscores instead of
256      hyphens in the option names. As this was not discovered for several releases,
257      the incorrect versions are left in the table for compatibility. However, the
258      --help function misses out any option that has an underscore in its name. */
259    
260      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262    
263    #ifdef JFRIEDL_DEBUG
264      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
265    #endif
266      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
267      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
268      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
269      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
270      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
271      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
272      { OP_NODATA,    0,        NULL,               NULL,            NULL }
273  };  };
274    
275    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277    that the combination of -w and -x has the same effect as -x on its own, so we
278    can treat them as the same. */
279    
280    static const char *prefix[] = {
281      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282    
283    static const char *suffix[] = {
284      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
285    
286    /* UTF-8 tables - used only when the newline setting is "any". */
287    
288    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289    
290    const char utf8_table4[] = {
291      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295    
296    
297    
298    /*************************************************
299    *         Exit from the program                  *
300    *************************************************/
301    
302    /* If there has been a resource error, give a suitable message.
303    
304    Argument:  the return code
305    Returns:   does not return
306    */
307    
308    static void
309    pcregrep_exit(int rc)
310    {
311    if (resource_error)
312      {
313      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316      }
317    
318    exit(rc);
319    }
320    
321    
322  /*************************************************  /*************************************************
323  *       Functions for directory scanning         *  *            OS-specific functions               *
324  *************************************************/  *************************************************/
325    
326  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
327  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
328    
329    
330  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
331    
332  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
333  #include <sys/types.h>  #include <sys/types.h>
334  #include <sys/stat.h>  #include <sys/stat.h>
335  #include <dirent.h>  #include <dirent.h>
# Line 184  for (;;) Line 361  for (;;)
361    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
362      return dent->d_name;      return dent->d_name;
363    }    }
364  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
365  }  }
366    
367  static void  static void
# Line 194  closedir(dir); Line 371  closedir(dir);
371  }  }
372    
373    
374    /************* Test for regular file in Unix **********/
375    
376    static int
377    isregfile(char *filename)
378    {
379    struct stat statbuf;
380    if (stat(filename, &statbuf) < 0)
381      return 1;        /* In the expectation that opening as a file will fail */
382    return (statbuf.st_mode & S_IFMT) == S_IFREG;
383    }
384    
385    
386    /************* Test for a terminal in Unix **********/
387    
388    static BOOL
389    is_stdout_tty(void)
390    {
391    return isatty(fileno(stdout));
392    }
393    
394    static BOOL
395    is_file_tty(FILE *f)
396    {
397    return isatty(fileno(f));
398    }
399    
400    
401  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
402    
403  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
404  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
405  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
406    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
407    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
408    undefined when it is indeed undefined. */
409    
410  #elif HAVE_WIN32API  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
411    
412  #ifndef STRICT  #ifndef STRICT
413  # define STRICT  # define STRICT
# Line 209  when it did not exist. */ Line 415  when it did not exist. */
415  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
416  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
417  #endif  #endif
418    
419    #include <windows.h>
420    
421  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
422  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
423  #endif  #endif
424    
 #include <windows.h>  
   
425  typedef struct directory_type  typedef struct directory_type
426  {  {
427  HANDLE handle;  HANDLE handle;
# Line 244  dir = (directory_type *) malloc(sizeof(* Line 451  dir = (directory_type *) malloc(sizeof(*
451  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
452    {    {
453    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
454    exit(2);    pcregrep_exit(2);
455    }    }
456  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
457  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 292  free(dir); Line 499  free(dir);
499  }  }
500    
501    
502    /************* Test for regular file in Win32 **********/
503    
504    /* I don't know how to do this, or if it can be done; assume all paths are
505    regular if they are not directories. */
506    
507    int isregfile(char *filename)
508    {
509    return !isdirectory(filename);
510    }
511    
512    
513    /************* Test for a terminal in Win32 **********/
514    
515    /* I don't know how to do this; assume never */
516    
517    static BOOL
518    is_stdout_tty(void)
519    {
520    return FALSE;
521    }
522    
523    static BOOL
524    is_file_tty(FILE *f)
525    {
526    return FALSE;
527    }
528    
529    
530  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
531    
532  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 300  free(dir); Line 535  free(dir);
535    
536  typedef void directory_type;  typedef void directory_type;
537    
538  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
539  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
540  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
541  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
542    
543    
544    /************* Test for regular when we can't do it **********/
545    
546    /* Assume all files are regular. */
547    
548    int isregfile(char *filename) { return 1; }
549    
550    
551    /************* Test for a terminal when we can't do it **********/
552    
553    static BOOL
554    is_stdout_tty(void)
555    {
556    return FALSE;
557    }
558    
559    static BOOL
560    is_file_tty(FILE *f)
561    {
562    return FALSE;
563    }
564    
565  #endif  #endif
566    
567    
568    
569  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
570  /*************************************************  /*************************************************
571  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
572  *************************************************/  *************************************************/
# Line 332  return sys_errlist[n]; Line 589  return sys_errlist[n];
589    
590    
591  /*************************************************  /*************************************************
592    *            Read one line of input              *
593    *************************************************/
594    
595    /* Normally, input is read using fread() into a large buffer, so many lines may
596    be read at once. However, doing this for tty input means that no output appears
597    until a lot of input has been typed. Instead, tty input is handled line by
598    line. We cannot use fgets() for this, because it does not stop at a binary
599    zero, and therefore there is no way of telling how many characters it has read,
600    because there may be binary zeros embedded in the data.
601    
602    Arguments:
603      buffer     the buffer to read into
604      length     the maximum number of characters to read
605      f          the file
606    
607    Returns:     the number of characters read, zero at end of file
608    */
609    
610    static int
611    read_one_line(char *buffer, int length, FILE *f)
612    {
613    int c;
614    int yield = 0;
615    while ((c = fgetc(f)) != EOF)
616      {
617      buffer[yield++] = c;
618      if (c == '\n' || yield >= length) break;
619      }
620    return yield;
621    }
622    
623    
624    
625    /*************************************************
626    *             Find end of line                   *
627    *************************************************/
628    
629    /* The length of the endline sequence that is found is set via lenptr. This may
630    be zero at the very end of the file if there is no line-ending sequence there.
631    
632    Arguments:
633      p         current position in line
634      endptr    end of available data
635      lenptr    where to put the length of the eol sequence
636    
637    Returns:    pointer to the last byte of the line, including the newline byte(s)
638    */
639    
640    static char *
641    end_of_line(char *p, char *endptr, int *lenptr)
642    {
643    switch(endlinetype)
644      {
645      default:      /* Just in case */
646      case EL_LF:
647      while (p < endptr && *p != '\n') p++;
648      if (p < endptr)
649        {
650        *lenptr = 1;
651        return p + 1;
652        }
653      *lenptr = 0;
654      return endptr;
655    
656      case EL_CR:
657      while (p < endptr && *p != '\r') p++;
658      if (p < endptr)
659        {
660        *lenptr = 1;
661        return p + 1;
662        }
663      *lenptr = 0;
664      return endptr;
665    
666      case EL_CRLF:
667      for (;;)
668        {
669        while (p < endptr && *p != '\r') p++;
670        if (++p >= endptr)
671          {
672          *lenptr = 0;
673          return endptr;
674          }
675        if (*p == '\n')
676          {
677          *lenptr = 2;
678          return p + 1;
679          }
680        }
681      break;
682    
683      case EL_ANYCRLF:
684      while (p < endptr)
685        {
686        int extra = 0;
687        register int c = *((unsigned char *)p);
688    
689        if (utf8 && c >= 0xc0)
690          {
691          int gcii, gcss;
692          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
693          gcss = 6*extra;
694          c = (c & utf8_table3[extra]) << gcss;
695          for (gcii = 1; gcii <= extra; gcii++)
696            {
697            gcss -= 6;
698            c |= (p[gcii] & 0x3f) << gcss;
699            }
700          }
701    
702        p += 1 + extra;
703    
704        switch (c)
705          {
706          case 0x0a:    /* LF */
707          *lenptr = 1;
708          return p;
709    
710          case 0x0d:    /* CR */
711          if (p < endptr && *p == 0x0a)
712            {
713            *lenptr = 2;
714            p++;
715            }
716          else *lenptr = 1;
717          return p;
718    
719          default:
720          break;
721          }
722        }   /* End of loop for ANYCRLF case */
723    
724      *lenptr = 0;  /* Must have hit the end */
725      return endptr;
726    
727      case EL_ANY:
728      while (p < endptr)
729        {
730        int extra = 0;
731        register int c = *((unsigned char *)p);
732    
733        if (utf8 && c >= 0xc0)
734          {
735          int gcii, gcss;
736          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
737          gcss = 6*extra;
738          c = (c & utf8_table3[extra]) << gcss;
739          for (gcii = 1; gcii <= extra; gcii++)
740            {
741            gcss -= 6;
742            c |= (p[gcii] & 0x3f) << gcss;
743            }
744          }
745    
746        p += 1 + extra;
747    
748        switch (c)
749          {
750          case 0x0a:    /* LF */
751          case 0x0b:    /* VT */
752          case 0x0c:    /* FF */
753          *lenptr = 1;
754          return p;
755    
756          case 0x0d:    /* CR */
757          if (p < endptr && *p == 0x0a)
758            {
759            *lenptr = 2;
760            p++;
761            }
762          else *lenptr = 1;
763          return p;
764    
765          case 0x85:    /* NEL */
766          *lenptr = utf8? 2 : 1;
767          return p;
768    
769          case 0x2028:  /* LS */
770          case 0x2029:  /* PS */
771          *lenptr = 3;
772          return p;
773    
774          default:
775          break;
776          }
777        }   /* End of loop for ANY case */
778    
779      *lenptr = 0;  /* Must have hit the end */
780      return endptr;
781      }     /* End of overall switch */
782    }
783    
784    
785    
786    /*************************************************
787    *         Find start of previous line            *
788    *************************************************/
789    
790    /* This is called when looking back for before lines to print.
791    
792    Arguments:
793      p         start of the subsequent line
794      startptr  start of available data
795    
796    Returns:    pointer to the start of the previous line
797    */
798    
799    static char *
800    previous_line(char *p, char *startptr)
801    {
802    switch(endlinetype)
803      {
804      default:      /* Just in case */
805      case EL_LF:
806      p--;
807      while (p > startptr && p[-1] != '\n') p--;
808      return p;
809    
810      case EL_CR:
811      p--;
812      while (p > startptr && p[-1] != '\n') p--;
813      return p;
814    
815      case EL_CRLF:
816      for (;;)
817        {
818        p -= 2;
819        while (p > startptr && p[-1] != '\n') p--;
820        if (p <= startptr + 1 || p[-2] == '\r') return p;
821        }
822      return p;   /* But control should never get here */
823    
824      case EL_ANY:
825      case EL_ANYCRLF:
826      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
827      if (utf8) while ((*p & 0xc0) == 0x80) p--;
828    
829      while (p > startptr)
830        {
831        register int c;
832        char *pp = p - 1;
833    
834        if (utf8)
835          {
836          int extra = 0;
837          while ((*pp & 0xc0) == 0x80) pp--;
838          c = *((unsigned char *)pp);
839          if (c >= 0xc0)
840            {
841            int gcii, gcss;
842            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
843            gcss = 6*extra;
844            c = (c & utf8_table3[extra]) << gcss;
845            for (gcii = 1; gcii <= extra; gcii++)
846              {
847              gcss -= 6;
848              c |= (pp[gcii] & 0x3f) << gcss;
849              }
850            }
851          }
852        else c = *((unsigned char *)pp);
853    
854        if (endlinetype == EL_ANYCRLF) switch (c)
855          {
856          case 0x0a:    /* LF */
857          case 0x0d:    /* CR */
858          return p;
859    
860          default:
861          break;
862          }
863    
864        else switch (c)
865          {
866          case 0x0a:    /* LF */
867          case 0x0b:    /* VT */
868          case 0x0c:    /* FF */
869          case 0x0d:    /* CR */
870          case 0x85:    /* NEL */
871          case 0x2028:  /* LS */
872          case 0x2029:  /* PS */
873          return p;
874    
875          default:
876          break;
877          }
878    
879        p = pp;  /* Back one character */
880        }        /* End of loop for ANY case */
881    
882      return startptr;  /* Hit start of data */
883      }     /* End of overall switch */
884    }
885    
886    
887    
888    
889    
890    /*************************************************
891  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
892  *************************************************/  *************************************************/
893    
894  /* This is called if we are about to lose said lines because of buffer filling,  /* This is called if we are about to lose said lines because of buffer filling,
895  and at the end of the file.  and at the end of the file. The data in the line is written using fwrite() so
896    that a binary zero does not terminate it.
897    
898  Arguments:  Arguments:
899    lastmatchnumber   the number of the last matching line, plus one    lastmatchnumber   the number of the last matching line, plus one
# Line 355  if (after_context > 0 && lastmatchnumber Line 912  if (after_context > 0 && lastmatchnumber
912    int count = 0;    int count = 0;
913    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
914      {      {
915        int ellength;
916      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
917      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
918      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
919      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
920      fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
921      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
922      }      }
923    hyphenpending = TRUE;    hyphenpending = TRUE;
924    }    }
# Line 369  if (after_context > 0 && lastmatchnumber Line 927  if (after_context > 0 && lastmatchnumber
927    
928    
929  /*************************************************  /*************************************************
930    *   Apply patterns to subject till one matches   *
931    *************************************************/
932    
933    /* This function is called to run through all patterns, looking for a match. It
934    is used multiple times for the same subject when colouring is enabled, in order
935    to find all possible matches.
936    
937    Arguments:
938      matchptr    the start of the subject
939      length      the length of the subject to match
940      offsets     the offets vector to fill in
941      mrc         address of where to put the result of pcre_exec()
942    
943    Returns:      TRUE if there was a match
944                  FALSE if there was no match
945                  invert if there was a non-fatal error
946    */
947    
948    static BOOL
949    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
950    {
951    int i;
952    size_t slen = length;
953    const char *msg = "this text:\n\n";
954    if (slen > 200)
955      {
956      slen = 200;
957      msg = "text that starts:\n\n";
958      }
959    for (i = 0; i < pattern_count; i++)
960      {
961      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
962        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
963      if (*mrc >= 0) return TRUE;
964      if (*mrc == PCRE_ERROR_NOMATCH) continue;
965      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
966      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
967      fprintf(stderr, "%s", msg);
968      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
969      fprintf(stderr, "\n\n");
970      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
971        resource_error = TRUE;
972      if (error_count++ > 20)
973        {
974        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
975        pcregrep_exit(2);
976        }
977      return invert;    /* No more matching; don't show the line again */
978      }
979    
980    return FALSE;  /* No match, no errors */
981    }
982    
983    
984    
985    /*************************************************
986  *            Grep an individual file             *  *            Grep an individual file             *
987  *************************************************/  *************************************************/
988    
# Line 380  be in the middle third most of the time, Line 994  be in the middle third most of the time,
994  "before" context printing.  "before" context printing.
995    
996  Arguments:  Arguments:
997    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
998                   the gzFile pointer when reading is via libz
999                   the BZFILE pointer when reading is via libbz2
1000      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1001    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1002                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1003                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1004    
1005  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1006                 1 otherwise (no matches)                 1 otherwise (no matches)
1007                   2 if there is a read error on a .bz2 file
1008  */  */
1009    
1010  static int  static int
1011  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
1012  {  {
1013  int rc = 1;  int rc = 1;
1014  int linenumber = 1;  int linenumber = 1;
1015  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1016  int count = 0;  int count = 0;
1017  int offsets[99];  int filepos = 0;
1018    int offsets[OFFSET_SIZE];
1019  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1020  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
1021  char *ptr = buffer;  char *ptr = buffer;
1022  char *endptr;  char *endptr;
1023  size_t bufflength;  size_t bufflength;
1024  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1025    BOOL input_line_buffered = line_buffered;
1026    FILE *in = NULL;                    /* Ensure initialized */
1027    
1028    #ifdef SUPPORT_LIBZ
1029    gzFile ingz = NULL;
1030    #endif
1031    
1032    #ifdef SUPPORT_LIBBZ2
1033    BZFILE *inbz2 = NULL;
1034    #endif
1035    
1036    
1037    /* Do the first read into the start of the buffer and set up the pointer to end
1038    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1039    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1040    fail. */
1041    
1042  /* Do the first read into the start of the buffer and set up the pointer to  #ifdef SUPPORT_LIBZ
1043  end of what we have. */  if (frtype == FR_LIBZ)
1044      {
1045      ingz = (gzFile)handle;
1046      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1047      }
1048    else
1049    #endif
1050    
1051    #ifdef SUPPORT_LIBBZ2
1052    if (frtype == FR_LIBBZ2)
1053      {
1054      inbz2 = (BZFILE *)handle;
1055      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1056      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1057      }                                    /* without the cast it is unsigned. */
1058    else
1059    #endif
1060    
1061      {
1062      in = (FILE *)handle;
1063      if (is_file_tty(in)) input_line_buffered = TRUE;
1064      bufflength = input_line_buffered?
1065        read_one_line(buffer, 3*MBUFTHIRD, in) :
1066        fread(buffer, 1, 3*MBUFTHIRD, in);
1067      }
1068    
 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  
1069  endptr = buffer + bufflength;  endptr = buffer + bufflength;
1070    
1071  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 417  way, the buffer is shifted left and re-f Line 1075  way, the buffer is shifted left and re-f
1075    
1076  while (ptr < endptr)  while (ptr < endptr)
1077    {    {
1078    int i;    int endlinelength;
1079    BOOL match = FALSE;    int mrc = 0;
1080      BOOL match;
1081      char *matchptr = ptr;
1082    char *t = ptr;    char *t = ptr;
1083    size_t length, linelength;    size_t length, linelength;
1084    
1085    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1086    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1087    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1088    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1089    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1090    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1091      first line. */
1092    
1093      t = end_of_line(t, endptr, &endlinelength);
1094      linelength = t - ptr - endlinelength;
1095      length = multiline? (size_t)(endptr - ptr) : linelength;
1096    
1097      /* Extra processing for Jeffrey Friedl's debugging. */
1098    
1099    #ifdef JFRIEDL_DEBUG
1100      if (jfriedl_XT || jfriedl_XR)
1101      {
1102          #include <sys/time.h>
1103          #include <time.h>
1104          struct timeval start_time, end_time;
1105          struct timezone dummy;
1106          int i;
1107    
1108    linelength = 0;        if (jfriedl_XT)
1109    while (t < endptr && *t++ != '\n') linelength++;        {
1110    length = multiline? endptr - ptr : linelength;            unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1111              const char *orig = ptr;
1112              ptr = malloc(newlen + 1);
1113              if (!ptr) {
1114                      printf("out of memory");
1115                      pcregrep_exit(2);
1116              }
1117              endptr = ptr;
1118              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1119              for (i = 0; i < jfriedl_XT; i++) {
1120                      strncpy(endptr, orig,  length);
1121                      endptr += length;
1122              }
1123              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1124              length = newlen;
1125          }
1126    
1127    /* Run through all the patterns until one matches. Note that we don't include        if (gettimeofday(&start_time, &dummy) != 0)
1128    the final newline in the subject string. */                perror("bad gettimeofday");
1129    
   for (i = 0; !match && i < pattern_count; i++)  
     {  
     match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,  
       offsets, 99) >= 0;  
     }  
1130    
1131    /* If it's a match or a not-match (as required), print what's wanted. */        for (i = 0; i < jfriedl_XR; i++)
1132              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1133                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1134    
1135          if (gettimeofday(&end_time, &dummy) != 0)
1136                  perror("bad gettimeofday");
1137    
1138          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1139                          -
1140                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1141    
1142          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1143          return 0;
1144      }
1145    #endif
1146    
1147      /* We come back here after a match when the -o option (only_matching) is set,
1148      in order to find any further matches in the same line. */
1149    
1150      ONLY_MATCHING_RESTART:
1151    
1152      /* Run through all the patterns until one matches or there is an error other
1153      than NOMATCH. This code is in a subroutine so that it can be re-used for
1154      finding subsequent matches when colouring matched lines. */
1155    
1156      match = match_patterns(matchptr, length, offsets, &mrc);
1157    
1158      /* If it's a match or a not-match (as required), do what's wanted. */
1159    
1160    if (match != invert)    if (match != invert)
1161      {      {
1162      BOOL hyphenprinted = FALSE;      BOOL hyphenprinted = FALSE;
1163    
1164      if (filenames_nomatch_only) return 1;      /* We've failed if we want a file that doesn't have any matches. */
1165    
1166        if (filenames == FN_NOMATCH_ONLY) return 1;
1167    
1168        /* Just count if just counting is wanted. */
1169    
1170      if (count_only) count++;      if (count_only) count++;
1171    
1172      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
1173        in the file. */
1174    
1175        else if (filenames == FN_MATCH_ONLY)
1176        {        {
1177        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1178        return 0;        return 0;
1179        }        }
1180    
1181        /* Likewise, if all we want is a yes/no answer. */
1182    
1183      else if (quiet) return 0;      else if (quiet) return 0;
1184    
1185        /* The --only-matching option prints just the substring that matched, or a
1186        captured portion of it, as long as this string is not empty, and the
1187        --file-offsets and --line-offsets options output offsets for the matching
1188        substring (they both force --only-matching = 0). None of these options
1189        prints any context. Afterwards, adjust the start and length, and then jump
1190        back to look for further matches in the same line. If we are in invert
1191        mode, however, nothing is printed and we do not restart - this could still
1192        be useful because the return code is set. */
1193    
1194        else if (only_matching >= 0)
1195          {
1196          if (!invert)
1197            {
1198            if (printname != NULL) fprintf(stdout, "%s:", printname);
1199            if (number) fprintf(stdout, "%d:", linenumber);
1200            if (line_offsets)
1201              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1202                offsets[1] - offsets[0]);
1203            else if (file_offsets)
1204              fprintf(stdout, "%d,%d\n",
1205                (int)(filepos + matchptr + offsets[0] - ptr),
1206                offsets[1] - offsets[0]);
1207            else if (only_matching < mrc)
1208              {
1209              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1210              if (plen > 0)
1211                {
1212                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1213                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1214                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1215                fprintf(stdout, "\n");
1216                }
1217              }
1218            else if (printname != NULL || number) fprintf(stdout, "\n");
1219            matchptr += offsets[1];
1220            length -= offsets[1];
1221            match = FALSE;
1222            if (line_buffered) fflush(stdout);
1223            rc = 0;    /* Had some success */
1224            goto ONLY_MATCHING_RESTART;
1225            }
1226          }
1227    
1228        /* This is the default case when none of the above options is set. We print
1229        the matching lines(s), possibly preceded and/or followed by other lines of
1230        context. */
1231    
1232      else      else
1233        {        {
1234        /* See if there is a requirement to print some "after" lines from a        /* See if there is a requirement to print some "after" lines from a
# Line 467  while (ptr < endptr) Line 1236  while (ptr < endptr)
1236    
1237        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1238          {          {
1239            int ellength;
1240          int linecount = 0;          int linecount = 0;
1241          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1242    
1243          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1244            {            {
1245            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1246            linecount++;            linecount++;
1247            }            }
1248    
1249          /* It is important to advance lastmatchrestart during this printing so          /* It is important to advance lastmatchrestart during this printing so
1250          that it interacts correctly with any "before" printing below. */          that it interacts correctly with any "before" printing below. Print
1251            each line's data using fwrite() in case there are binary zeroes. */
1252    
1253          while (lastmatchrestart < p)          while (lastmatchrestart < p)
1254            {            {
1255            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1256            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1257            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1258            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1259            fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1260            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1261            }            }
1262          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1263          }          }
# Line 510  while (ptr < endptr) Line 1280  while (ptr < endptr)
1280          char *p = ptr;          char *p = ptr;
1281    
1282          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1283                 linecount++ < before_context)                 linecount < before_context)
1284            {            {
1285            p--;            linecount++;
1286            while (p > buffer && p[-1] != '\n') p--;            p = previous_line(p, buffer);
1287            }            }
1288    
1289          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 521  while (ptr < endptr) Line 1291  while (ptr < endptr)
1291    
1292          while (p < ptr)          while (p < ptr)
1293            {            {
1294              int ellength;
1295            char *pp = p;            char *pp = p;
1296            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1297            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1298            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1299            fprintf(stdout, "%.*s", pp - p + 1, p);            FWRITE(p, 1, pp - p, stdout);
1300            p = pp + 1;            p = pp;
1301            }            }
1302          }          }
1303    
1304        /* Now print the matching line(s); ensure we set hyphenpending at the end        /* Now print the matching line(s); ensure we set hyphenpending at the end
1305        of the file. */        of the file if any context lines are being output. */
1306    
1307          if (after_context > 0 || before_context > 0)
1308            endhyphenpending = TRUE;
1309    
       endhyphenpending = TRUE;  
1310        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (printname != NULL) fprintf(stdout, "%s:", printname);
1311        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
1312    
1313        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1314        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1315        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1316        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1317          the match will always be before the first newline sequence. */
1318    
1319        if (multiline)        if (multiline & !invert)
1320          {          {
1321          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1322          t = ptr;          t = ptr;
1323          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch)
1324          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1325          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &endlinelength);
1326              if (t < endmatch) linenumber++; else break;
1327              }
1328            linelength = t - ptr - endlinelength;
1329            }
1330    
1331          /*** NOTE: Use only fwrite() to output the data line, so that binary
1332          zeroes are treated as just another data character. */
1333    
1334          /* This extra option, for Jeffrey Friedl's debugging requirements,
1335          replaces the matched string, or a specific captured string if it exists,
1336          with X. When this happens, colouring is ignored. */
1337    
1338    #ifdef JFRIEDL_DEBUG
1339          if (S_arg >= 0 && S_arg < mrc)
1340            {
1341            int first = S_arg * 2;
1342            int last  = first + 1;
1343            FWRITE(ptr, 1, offsets[first], stdout);
1344            fprintf(stdout, "X");
1345            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1346          }          }
1347          else
1348    #endif
1349    
1350          /* We have to split the line(s) up if colouring, and search for further
1351          matches, but not of course if the line is a non-match. */
1352    
1353          if (do_colour && !invert)
1354            {
1355            int plength;
1356            int last_offset = 0;
1357            FWRITE(ptr, 1, offsets[0], stdout);
1358            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1359            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1360            fprintf(stdout, "%c[00m", 0x1b);
1361            for (;;)
1362              {
1363              last_offset += offsets[1];
1364              matchptr += offsets[1];
1365              length -= offsets[1];
1366              if (last_offset >= linelength + endlinelength ||
1367                  !match_patterns(matchptr, length, offsets, &mrc)) break;
1368              FWRITE(matchptr, 1, offsets[0], stdout);
1369              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1370              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1371              fprintf(stdout, "%c[00m", 0x1b);
1372              }
1373    
1374        fprintf(stdout, "%.*s\n", linelength, ptr);          /* In multiline mode, we may have already printed the complete line
1375            and its line-ending characters (if they matched the pattern), so there
1376            may be no more to print. */
1377    
1378            plength = (linelength + endlinelength) - last_offset;
1379            if (plength > 0)
1380              FWRITE(ptr + last_offset, 1, plength, stdout);
1381            }
1382    
1383          /* Not colouring; no need to search for further matches */
1384    
1385          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1386        }        }
1387    
1388        /* End of doing what has to be done for a match. If --line-buffered was
1389        given, flush the output. */
1390    
1391        if (line_buffered) fflush(stdout);
1392      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1393    
1394      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1395      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1396    
1397      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1398      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1399      }      }
1400    
1401    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1402      anything to be printed), we have to move on to the end of the match before
1403      proceeding. */
1404    
1405      if (multiline && invert && match)
1406        {
1407        int ellength;
1408        char *endmatch = ptr + offsets[1];
1409        t = ptr;
1410        while (t < endmatch)
1411          {
1412          t = end_of_line(t, endptr, &ellength);
1413          if (t <= endmatch) linenumber++; else break;
1414          }
1415        endmatch = end_of_line(endmatch, endptr, &ellength);
1416        linelength = endmatch - ptr - ellength;
1417        }
1418    
1419      /* Advance to after the newline and increment the line number. The file
1420      offset to the current line is maintained in filepos. */
1421    
1422    ptr += linelength + 1;    ptr += linelength + endlinelength;
1423      filepos += (int)(linelength + endlinelength);
1424    linenumber++;    linenumber++;
1425    
1426      /* If input is line buffered, and the buffer is not yet full, read another
1427      line and add it into the buffer. */
1428    
1429      if (input_line_buffered && bufflength < sizeof(buffer))
1430        {
1431        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1432        bufflength += add;
1433        endptr += add;
1434        }
1435    
1436    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1437    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1438    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
# Line 587  while (ptr < endptr) Line 1452  while (ptr < endptr)
1452    
1453      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1454      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1455      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1456    #ifdef SUPPORT_LIBZ
1457        if (frtype == FR_LIBZ)
1458          bufflength = 2*MBUFTHIRD +
1459            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1460        else
1461    #endif
1462    
1463    #ifdef SUPPORT_LIBBZ2
1464        if (frtype == FR_LIBBZ2)
1465          bufflength = 2*MBUFTHIRD +
1466            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1467        else
1468    #endif
1469    
1470        bufflength = 2*MBUFTHIRD +
1471          (input_line_buffered?
1472           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1473           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1474      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1475    
1476      /* Adjust any last match point */      /* Adjust any last match point */
# Line 599  while (ptr < endptr) Line 1482  while (ptr < endptr)
1482  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1483  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1484    
1485  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);  if (only_matching < 0 && !count_only)
1486  hyphenpending |= endhyphenpending;    {
1487      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1488      hyphenpending |= endhyphenpending;
1489      }
1490    
1491  /* Print the file name if we are looking for those without matches and there  /* Print the file name if we are looking for those without matches and there
1492  were none. If we found a match, we won't have got this far. */  were none. If we found a match, we won't have got this far. */
1493    
1494  if (filenames_nomatch_only)  if (filenames == FN_NOMATCH_ONLY)
1495    {    {
1496    fprintf(stdout, "%s\n", printname);    fprintf(stdout, "%s\n", printname);
1497    return 0;    return 0;
# Line 615  if (filenames_nomatch_only) Line 1501  if (filenames_nomatch_only)
1501    
1502  if (count_only)  if (count_only)
1503    {    {
1504    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1505    fprintf(stdout, "%d\n", count);      {
1506        if (printname != NULL && filenames != FN_NONE)
1507          fprintf(stdout, "%s:", printname);
1508        fprintf(stdout, "%d\n", count);
1509        }
1510    }    }
1511    
1512  return rc;  return rc;
# Line 633  recursing; if it's a file, grep it. Line 1523  recursing; if it's a file, grep it.
1523    
1524  Arguments:  Arguments:
1525    pathname          the path to investigate    pathname          the path to investigate
1526    dir_recurse       TRUE if recursing is wanted (-r)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   show_filenames    TRUE if file names are wanted for multiple files, except  
                       for the only file at top level when not filenames_only  
1527    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1528    
1529  Returns:   0 if there was at least one match  Returns:   0 if there was at least one match
# Line 646  However, file opening failures are suppr Line 1534  However, file opening failures are suppr
1534  */  */
1535    
1536  static int  static int
1537  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
1538  {  {
1539  int rc = 1;  int rc = 1;
1540  int sep;  int sep;
1541  FILE *in;  int frtype;
1542  char *printname;  int pathlen;
1543    void *handle;
1544    FILE *in = NULL;           /* Ensure initialized */
1545    
1546    #ifdef SUPPORT_LIBZ
1547    gzFile ingz = NULL;
1548    #endif
1549    
1550    #ifdef SUPPORT_LIBBZ2
1551    BZFILE *inbz2 = NULL;
1552    #endif
1553    
1554  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1555    
1556  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1557    {    {
1558    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1559      (filenames_only || filenames_nomatch_only ||      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
     (show_filenames && !only_one_at_top))?  
1560        stdin_name : NULL);        stdin_name : NULL);
1561    }    }
1562    
1563  /* If the file is a directory and we are recursing, scan each file within it,  /* If the file is a directory, skip if skipping or if we are recursing, scan
1564  subject to any include or exclude patterns that were set. The scanning code is  each file and directory within it, subject to any include or exclude patterns
1565  localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1566    system-specific. */
1567    
1568  if ((sep = isdirectory(pathname)) != 0 && dir_recurse)  if ((sep = isdirectory(pathname)) != 0)
1569    {    {
1570    char buffer[1024];    if (dee_action == dee_SKIP) return 1;
1571    char *nextfile;    if (dee_action == dee_RECURSE)
1572    directory_type *dir = opendirectory(pathname);      {
1573        char buffer[1024];
1574        char *nextfile;
1575        directory_type *dir = opendirectory(pathname);
1576    
1577        if (dir == NULL)
1578          {
1579          if (!silent)
1580            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1581              strerror(errno));
1582          return 2;
1583          }
1584    
1585        while ((nextfile = readdirectory(dir)) != NULL)
1586          {
1587          int frc, nflen;
1588          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1589          nflen = (int)(strlen(nextfile));
1590    
1591          if (isdirectory(buffer))
1592            {
1593            if (exclude_dir_compiled != NULL &&
1594                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1595              continue;
1596    
1597            if (include_dir_compiled != NULL &&
1598                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1599              continue;
1600            }
1601          else
1602            {
1603            if (exclude_compiled != NULL &&
1604                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1605              continue;
1606    
1607            if (include_compiled != NULL &&
1608                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1609              continue;
1610            }
1611    
1612          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1613          if (frc > 1) rc = frc;
1614           else if (frc == 0 && rc == 1) rc = 0;
1615          }
1616    
1617        closedirectory(dir);
1618        return rc;
1619        }
1620      }
1621    
1622    /* If the file is not a directory and not a regular file, skip it if that's
1623    been requested. */
1624    
1625    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1626    
1627    /* Control reaches here if we have a regular file, or if we have a directory
1628    and recursion or skipping was not requested, or if we have anything else and
1629    skipping was not requested. The scan proceeds. If this is the first and only
1630    argument at top level, we don't show the file name, unless we are only showing
1631    the file name, or the filename was forced (-H). */
1632    
1633    pathlen = (int)(strlen(pathname));
1634    
1635    if (dir == NULL)  /* Open using zlib if it is supported and the file name ends with .gz. */
1636    
1637    #ifdef SUPPORT_LIBZ
1638    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1639      {
1640      ingz = gzopen(pathname, "rb");
1641      if (ingz == NULL)
1642      {      {
1643      if (!silent)      if (!silent)
1644        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1645          strerror(errno));          strerror(errno));
1646      return 2;      return 2;
1647      }      }
1648      handle = (void *)ingz;
1649      frtype = FR_LIBZ;
1650      }
1651    else
1652    #endif
1653    
1654    while ((nextfile = readdirectory(dir)) != NULL)  /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
     {  
     int frc, blen;  
     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);  
     blen = strlen(buffer);  
   
     if (exclude_compiled != NULL &&  
         pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
       continue;  
1655    
1656      if (include_compiled != NULL &&  #ifdef SUPPORT_LIBBZ2
1657          pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1658        continue;    {
1659      inbz2 = BZ2_bzopen(pathname, "rb");
1660      handle = (void *)inbz2;
1661      frtype = FR_LIBBZ2;
1662      }
1663    else
1664    #endif
1665    
1666      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);  /* Otherwise use plain fopen(). The label is so that we can come back here if
1667      if (frc > 1) rc = frc;  an attempt to read a .bz2 file indicates that it really is a plain file. */
      else if (frc == 0 && rc == 1) rc = 0;  
     }  
1668    
1669    closedirectory(dir);  #ifdef SUPPORT_LIBBZ2
1670    return rc;  PLAIN_FILE:
1671    #endif
1672      {
1673      in = fopen(pathname, "rb");
1674      handle = (void *)in;
1675      frtype = FR_PLAIN;
1676    }    }
1677    
1678  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* All the opening methods return errno when they fail. */
 the first and only argument at top level, we don't show the file name (unless  
 we are only showing the file name). Otherwise, control is via the  
 show_filenames variable. */  
1679    
1680  in = fopen(pathname, "r");  if (handle == NULL)
 if (in == NULL)  
1681    {    {
1682    if (!silent)    if (!silent)
1683      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 719  if (in == NULL) Line 1685  if (in == NULL)
1685    return 2;    return 2;
1686    }    }
1687    
1688  printname =  (filenames_only || filenames_nomatch_only ||  /* Now grep the file */
1689    (show_filenames && !only_one_at_top))? pathname : NULL;  
1690    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1691      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1692    
1693    /* Close in an appropriate manner. */
1694    
1695    #ifdef SUPPORT_LIBZ
1696    if (frtype == FR_LIBZ)
1697      gzclose(ingz);
1698    else
1699    #endif
1700    
1701    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1702    read failed. If the error indicates that the file isn't in fact bzipped, try
1703    again as a normal file. */
1704    
1705    #ifdef SUPPORT_LIBBZ2
1706    if (frtype == FR_LIBBZ2)
1707      {
1708      if (rc == 2)
1709        {
1710        int errnum;
1711        const char *err = BZ2_bzerror(inbz2, &errnum);
1712        if (errnum == BZ_DATA_ERROR_MAGIC)
1713          {
1714          BZ2_bzclose(inbz2);
1715          goto PLAIN_FILE;
1716          }
1717        else if (!silent)
1718          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1719            pathname, err);
1720        }
1721      BZ2_bzclose(inbz2);
1722      }
1723    else
1724    #endif
1725    
1726  rc = pcregrep(in, printname);  /* Normal file close */
1727    
1728  fclose(in);  fclose(in);
1729    
1730    /* Pass back the yield from pcregrep(). */
1731    
1732  return rc;  return rc;
1733  }  }
1734    
# Line 738  return rc; Line 1742  return rc;
1742  static int  static int
1743  usage(int rc)  usage(int rc)
1744  {  {
1745  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");  option_item *op;
1746  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Usage: pcregrep [-");
1747    for (op = optionlist; op->one_char != 0; op++)
1748      {
1749      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1750      }
1751    fprintf(stderr, "] [long options] [pattern] [files]\n");
1752    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1753      "options.\n");
1754  return rc;  return rc;
1755  }  }
1756    
# Line 757  option_item *op; Line 1768  option_item *op;
1768    
1769  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1770  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1771  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1772  printf("\"-\" can be used as a file name to mean STDIN.\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1773    
1774    #ifdef SUPPORT_LIBZ
1775    printf("Files whose names end in .gz are read using zlib.\n");
1776    #endif
1777    
1778    #ifdef SUPPORT_LIBBZ2
1779    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1780    #endif
1781    
1782    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1783    printf("Other files and the standard input are read as plain files.\n\n");
1784    #else
1785    printf("All files are read as plain files, without any interpretation.\n\n");
1786    #endif
1787    
1788    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1789  printf("Options:\n");  printf("Options:\n");
1790    
1791  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
1792    {    {
1793    int n;    int n;
1794    char s[4];    char s[4];
1795    
1796      /* Two options were accidentally implemented and documented with underscores
1797      instead of hyphens in their names, something that was not noticed for quite a
1798      few releases. When fixing this, I left the underscored versions in the list
1799      in case people were using them. However, we don't want to display them in the
1800      help data. There are no other options that contain underscores, and we do not
1801      expect ever to implement such options. Therefore, just omit any option that
1802      contains an underscore. */
1803    
1804      if (strchr(op->long_name, '_') != NULL) continue;
1805    
1806    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1807    printf("  %s --%s%n", s, op->long_name, &n);    n = 31 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1808    if (n < 1) n = 1;    if (n < 1) n = 1;
1809    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1810    }    }
1811    
1812  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nWhen reading patterns from a file instead of using a command line option,\n");
# Line 794  handle_option(int letter, int options) Line 1829  handle_option(int letter, int options)
1829  {  {
1830  switch(letter)  switch(letter)
1831    {    {
1832    case -1:  help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1833      case N_HELP: help(); pcregrep_exit(0);
1834      case N_LOFFSETS: line_offsets = number = TRUE; break;
1835      case N_LBUFFER: line_buffered = TRUE; break;
1836    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1837    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1838      case 'H': filenames = FN_FORCE; break;
1839      case 'h': filenames = FN_NONE; break;
1840    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1841    case 'l': filenames_only = TRUE; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1842    case 'L': filenames_nomatch_only = TRUE; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1843    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1844    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1845      case 'o': only_matching = 0; break;
1846    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1847    case 'r': recurse = TRUE; break;    case 'r': dee_action = dee_RECURSE; break;
1848    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1849    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1850    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1851    case 'w': word_match = TRUE; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1852    case 'x': whole_lines = TRUE; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1853    
1854    case 'V':    case 'V':
1855    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1856    fprintf(stderr, "PCRE version %s\n", pcre_version());    pcregrep_exit(0);
   exit(0);  
1857    break;    break;
1858    
1859    default:    default:
1860    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1861    exit(usage(2));    pcregrep_exit(usage(2));
1862    }    }
1863    
1864  return options;  return options;
# Line 828  return options; Line 1868  return options;
1868    
1869    
1870  /*************************************************  /*************************************************
1871    *          Construct printed ordinal             *
1872    *************************************************/
1873    
1874    /* This turns a number into "1st", "3rd", etc. */
1875    
1876    static char *
1877    ordin(int n)
1878    {
1879    static char buffer[8];
1880    char *p = buffer;
1881    sprintf(p, "%d", n);
1882    while (*p != 0) p++;
1883    switch (n%10)
1884      {
1885      case 1: strcpy(p, "st"); break;
1886      case 2: strcpy(p, "nd"); break;
1887      case 3: strcpy(p, "rd"); break;
1888      default: strcpy(p, "th"); break;
1889      }
1890    return buffer;
1891    }
1892    
1893    
1894    
1895    /*************************************************
1896    *          Compile a single pattern              *
1897    *************************************************/
1898    
1899    /* When the -F option has been used, this is called for each substring.
1900    Otherwise it's called for each supplied pattern.
1901    
1902    Arguments:
1903      pattern        the pattern string
1904      options        the PCRE options
1905      filename       the file name, or NULL for a command-line pattern
1906      count          0 if this is the only command line pattern, or
1907                     number of the command line pattern, or
1908                     linenumber for a pattern from a file
1909    
1910    Returns:         TRUE on success, FALSE after an error
1911    */
1912    
1913    static BOOL
1914    compile_single_pattern(char *pattern, int options, char *filename, int count)
1915    {
1916    char buffer[MBUFTHIRD + 16];
1917    const char *error;
1918    int errptr;
1919    
1920    if (pattern_count >= MAX_PATTERN_COUNT)
1921      {
1922      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1923        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1924      return FALSE;
1925      }
1926    
1927    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1928      suffix[process_options]);
1929    pattern_list[pattern_count] =
1930      pcre_compile(buffer, options, &error, &errptr, pcretables);
1931    if (pattern_list[pattern_count] != NULL)
1932      {
1933      pattern_count++;
1934      return TRUE;
1935      }
1936    
1937    /* Handle compile errors */
1938    
1939    errptr -= (int)strlen(prefix[process_options]);
1940    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1941    
1942    if (filename == NULL)
1943      {
1944      if (count == 0)
1945        fprintf(stderr, "pcregrep: Error in command-line regex "
1946          "at offset %d: %s\n", errptr, error);
1947      else
1948        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1949          "at offset %d: %s\n", ordin(count), errptr, error);
1950      }
1951    else
1952      {
1953      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1954        "at offset %d: %s\n", count, filename, errptr, error);
1955      }
1956    
1957    return FALSE;
1958    }
1959    
1960    
1961    
1962    /*************************************************
1963    *           Compile one supplied pattern         *
1964    *************************************************/
1965    
1966    /* When the -F option has been used, each string may be a list of strings,
1967    separated by line breaks. They will be matched literally.
1968    
1969    Arguments:
1970      pattern        the pattern string
1971      options        the PCRE options
1972      filename       the file name, or NULL for a command-line pattern
1973      count          0 if this is the only command line pattern, or
1974                     number of the command line pattern, or
1975                     linenumber for a pattern from a file
1976    
1977    Returns:         TRUE on success, FALSE after an error
1978    */
1979    
1980    static BOOL
1981    compile_pattern(char *pattern, int options, char *filename, int count)
1982    {
1983    if ((process_options & PO_FIXED_STRINGS) != 0)
1984      {
1985      char *eop = pattern + strlen(pattern);
1986      char buffer[MBUFTHIRD];
1987      for(;;)
1988        {
1989        int ellength;
1990        char *p = end_of_line(pattern, eop, &ellength);
1991        if (ellength == 0)
1992          return compile_single_pattern(pattern, options, filename, count);
1993        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1994        pattern = p;
1995        if (!compile_single_pattern(buffer, options, filename, count))
1996          return FALSE;
1997        }
1998      }
1999    else return compile_single_pattern(pattern, options, filename, count);
2000    }
2001    
2002    
2003    
2004    /*************************************************
2005  *                Main program                    *  *                Main program                    *
2006  *************************************************/  *************************************************/
2007    
# Line 838  main(int argc, char **argv) Line 2012  main(int argc, char **argv)
2012  {  {
2013  int i, j;  int i, j;
2014  int rc = 1;  int rc = 1;
2015  int options = 0;  int pcre_options = 0;
2016    int cmd_pattern_count = 0;
2017    int hint_count = 0;
2018  int errptr;  int errptr;
 const char *error;  
2019  BOOL only_one_at_top;  BOOL only_one_at_top;
2020    char *patterns[MAX_PATTERN_COUNT];
2021    const char *locale_from = "--locale";
2022    const char *error;
2023    
2024    /* Set the default line ending value from the default in the PCRE library;
2025    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2026    Note that the return values from pcre_config(), though derived from the ASCII
2027    codes, are the same in EBCDIC environments, so we must use the actual values
2028    rather than escapes such as as '\r'. */
2029    
2030    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2031    switch(i)
2032      {
2033      default:               newline = (char *)"lf"; break;
2034      case 13:               newline = (char *)"cr"; break;
2035      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2036      case -1:               newline = (char *)"any"; break;
2037      case -2:               newline = (char *)"anycrlf"; break;
2038      }
2039    
2040  /* Process the options */  /* Process the options */
2041    
# Line 855  for (i = 1; i < argc; i++) Line 2049  for (i = 1; i < argc; i++)
2049    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2050    
2051    /* If we hit an argument that is just "-", it may be a reference to STDIN,    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2052    but only if we have previously had -f to define the patterns. */    but only if we have previously had -e or -f to define the patterns. */
2053    
2054    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2055      {      {
2056      if (pattern_filename != NULL) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2057        else exit(usage(2));        else pcregrep_exit(usage(2));
2058      }      }
2059    
2060    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 881  for (i = 1; i < argc; i++) Line 2075  for (i = 1; i < argc; i++)
2075      /* Some long options have data that follows after =, for example file=name.      /* Some long options have data that follows after =, for example file=name.
2076      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2077      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2078      with Jeff Friedl in preferring "regex" without the "p". These options are      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2079      entered in the table as "regex(p)". No option is in both these categories,      These options are entered in the table as "regex(p)". Options can be in
2080      fortunately. */      both these categories. */
2081    
2082      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2083        {        {
2084        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2085        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2086        if (opbra == NULL)     /* Not a (p) case */  
2087          /* Handle options with only one spelling of the name */
2088    
2089          if (opbra == NULL)     /* Does not contain '(' */
2090          {          {
2091          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2092            {            {
# Line 897  for (i = 1; i < argc; i++) Line 2094  for (i = 1; i < argc; i++)
2094            }            }
2095          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2096            {            {
2097            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2098            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2099                (int)strlen(arg) : (int)(argequals - arg);
2100            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2101              {              {
2102              option_data = arg + arglen;              option_data = arg + arglen;
# Line 911  for (i = 1; i < argc; i++) Line 2109  for (i = 1; i < argc; i++)
2109              }              }
2110            }            }
2111          }          }
2112        else                   /* Special case xxxx(p) */  
2113          /* Handle options with an alternate spelling of the name */
2114    
2115          else
2116          {          {
2117          char buff1[24];          char buff1[24];
2118          char buff2[24];          char buff2[24];
2119          int baselen = opbra - op->long_name;  
2120            int baselen = (int)(opbra - op->long_name);
2121            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2122            int arglen = (argequals == NULL || equals == NULL)?
2123              (int)strlen(arg) : (int)(argequals - arg);
2124    
2125          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2126          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2127            opbra + 1);  
2128          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2129               strncmp(arg, buff2, arglen) == 0)
2130              {
2131              if (equals != NULL && argequals != NULL)
2132                {
2133                option_data = argequals;
2134                if (*option_data == '=')
2135                  {
2136                  option_data++;
2137                  longopwasequals = TRUE;
2138                  }
2139                }
2140            break;            break;
2141              }
2142          }          }
2143        }        }
2144    
2145      if (op->one_char == 0)      if (op->one_char == 0)
2146        {        {
2147        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2148        exit(usage(2));        pcregrep_exit(usage(2));
2149        }        }
2150      }      }
2151    
2152      /* Jeffrey Friedl's debugging harness uses these additional options which
2153      are not in the right form for putting in the option table because they use
2154      only one hyphen, yet are more than one character long. By putting them
2155      separately here, they will not get displayed as part of the help() output,
2156      but I don't think Jeffrey will care about that. */
2157    
2158    #ifdef JFRIEDL_DEBUG
2159      else if (strcmp(argv[i], "-pre") == 0) {
2160              jfriedl_prefix = argv[++i];
2161              continue;
2162      } else if (strcmp(argv[i], "-post") == 0) {
2163              jfriedl_postfix = argv[++i];
2164              continue;
2165      } else if (strcmp(argv[i], "-XT") == 0) {
2166              sscanf(argv[++i], "%d", &jfriedl_XT);
2167              continue;
2168      } else if (strcmp(argv[i], "-XR") == 0) {
2169              sscanf(argv[++i], "%d", &jfriedl_XR);
2170              continue;
2171      }
2172    #endif
2173    
2174    
2175    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
2176    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
2177    
# Line 941  for (i = 1; i < argc; i++) Line 2182  for (i = 1; i < argc; i++)
2182      while (*s != 0)      while (*s != 0)
2183        {        {
2184        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2185          { if (*s == op->one_char) break; }          {
2186            if (*s == op->one_char) break;
2187            }
2188        if (op->one_char == 0)        if (op->one_char == 0)
2189          {          {
2190          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2191            *s, argv[i]);            *s, argv[i]);
2192          exit(usage(2));          pcregrep_exit(usage(2));
2193            }
2194    
2195          /* Check for a single-character option that has data: OP_OP_NUMBER
2196          is used for one that either has a numerical number or defaults, i.e. the
2197          data is optional. If a digit follows, there is data; if not, carry on
2198          with other single-character options in the same string. */
2199    
2200          option_data = s+1;
2201          if (op->type == OP_OP_NUMBER)
2202            {
2203            if (isdigit((unsigned char)s[1])) break;
2204          }          }
2205        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for end or a dataless option */
2206          {          {
2207          option_data = s+1;          if (op->type != OP_NODATA || s[1] == 0) break;
         break;  
2208          }          }
2209        options = handle_option(*s++, options);  
2210          /* Handle a single-character option with no data, then loop for the
2211          next character in the string. */
2212    
2213          pcre_options = handle_option(*s++, pcre_options);
2214        }        }
2215      }      }
2216    
2217    /* At this point we should have op pointing to a matched option */    /* At this point we should have op pointing to a matched option. If the type
2218      is NO_DATA, it means that there is no data, and the option might set
2219      something in the PCRE options. */
2220    
2221    if (op->type == OP_NODATA)    if (op->type == OP_NODATA)
     options = handle_option(op->one_char, options);  
   else  
2222      {      {
2223      if (*option_data == 0)      pcre_options = handle_option(op->one_char, pcre_options);
2224        continue;
2225        }
2226    
2227      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2228      either has a value or defaults to something. It cannot have data in a
2229      separate item. At the moment, the only such options are "colo(u)r",
2230      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2231    
2232      if (*option_data == 0 &&
2233          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2234        {
2235        switch (op->one_char)
2236        {        {
2237        if (i >= argc - 1 || longopwasequals)        case N_COLOUR:
2238          {        colour_option = (char *)"auto";
2239          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        break;
2240          exit(usage(2));  
2241          }        case 'o':
2242        option_data = argv[++i];        only_matching = 0;
2243          break;
2244    
2245    #ifdef JFRIEDL_DEBUG
2246          case 'S':
2247          S_arg = 0;
2248          break;
2249    #endif
2250          }
2251        continue;
2252        }
2253    
2254      /* Otherwise, find the data string for the option. */
2255    
2256      if (*option_data == 0)
2257        {
2258        if (i >= argc - 1 || longopwasequals)
2259          {
2260          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2261          pcregrep_exit(usage(2));
2262          }
2263        option_data = argv[++i];
2264        }
2265    
2266      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2267      multiple times to create a list of patterns. */
2268    
2269      if (op->type == OP_PATLIST)
2270        {
2271        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2272          {
2273          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2274            MAX_PATTERN_COUNT);
2275          return 2;
2276        }        }
2277        patterns[cmd_pattern_count++] = option_data;
2278        }
2279    
2280      /* Otherwise, deal with single string or numeric data values. */
2281    
2282      else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2283               op->type != OP_OP_NUMBER)
2284        {
2285        *((char **)op->dataptr) = option_data;
2286        }
2287    
2288      if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else    /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2289      only for unpicking arguments, so just keep it simple. */
2290    
2291      else
2292        {
2293        unsigned long int n = 0;
2294        char *endptr = option_data;
2295        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2296        while (isdigit((unsigned char)(*endptr)))
2297          n = n * 10 + (int)(*endptr++ - '0');
2298        if (*endptr != 0)
2299        {        {
2300        char *endptr;        if (longop)
       int n = strtoul(option_data, &endptr, 10);  
       if (*endptr != 0)  
2301          {          {
2302          if (longop)          char *equals = strchr(op->long_name, '=');
2303            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2304              option_data, op->long_name);            (int)(equals - op->long_name);
2305          else          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2306            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",            option_data, nlen, op->long_name);
             option_data, op->one_char);  
         exit(usage(2));  
2307          }          }
2308        *((int *)op->dataptr) = n;        else
2309            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2310              option_data, op->one_char);
2311          pcregrep_exit(usage(2));
2312        }        }
2313        if (op->type == OP_LONGNUMBER)
2314            *((unsigned long int *)op->dataptr) = n;
2315        else
2316            *((int *)op->dataptr) = n;
2317      }      }
2318    }    }
2319    
# Line 1001  if (both_context > 0) Line 2326  if (both_context > 0)
2326    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2327    }    }
2328    
2329  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2330  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  However, the latter two set only_matching. */
2331    
2332  if (pattern_list == NULL || hints_list == NULL)  if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2333        (file_offsets && line_offsets))
2334    {    {
2335    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2336    return 2;      "and/or --line-offsets\n");
2337      pcregrep_exit(usage(2));
2338    }    }
2339    
2340  /* Compile the regular expression(s). */  if (file_offsets || line_offsets) only_matching = 0;
2341    
2342  if (pattern_filename != NULL)  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2343    LC_ALL environment variable is set, and if so, use it. */
2344    
2345    if (locale == NULL)
2346      {
2347      locale = getenv("LC_ALL");
2348      locale_from = "LCC_ALL";
2349      }
2350    
2351    if (locale == NULL)
2352    {    {
2353    FILE *f = fopen(pattern_filename, "r");    locale = getenv("LC_CTYPE");
2354    char buffer[MBUFTHIRD + 16];    locale_from = "LC_CTYPE";
2355    char *rdstart;    }
   int adjust = 0;  
2356    
2357    if (f == NULL)  /* If a locale has been provided, set it, and generate the tables the PCRE
2358    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2359    
2360    if (locale != NULL)
2361      {
2362      if (setlocale(LC_CTYPE, locale) == NULL)
2363      {      {
2364      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2365        strerror(errno));        locale, locale_from);
2366      return 2;      return 2;
2367      }      }
2368      pcretables = pcre_maketables();
2369      }
2370    
2371    /* Sort out colouring */
2372    
2373    if (whole_lines)  if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2374      {
2375      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2376      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2377      else
2378        {
2379        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2380          colour_option);
2381        return 2;
2382        }
2383      if (do_colour)
2384      {      {
2385      strcpy(buffer, "^(?:");      char *cs = getenv("PCREGREP_COLOUR");
2386      adjust = 4;      if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2387        if (cs != NULL) colour_string = cs;
2388      }      }
2389    else if (word_match)    }
2390    
2391    /* Interpret the newline type; the default settings are Unix-like. */
2392    
2393    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2394      {
2395      pcre_options |= PCRE_NEWLINE_CR;
2396      endlinetype = EL_CR;
2397      }
2398    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2399      {
2400      pcre_options |= PCRE_NEWLINE_LF;
2401      endlinetype = EL_LF;
2402      }
2403    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2404      {
2405      pcre_options |= PCRE_NEWLINE_CRLF;
2406      endlinetype = EL_CRLF;
2407      }
2408    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2409      {
2410      pcre_options |= PCRE_NEWLINE_ANY;
2411      endlinetype = EL_ANY;
2412      }
2413    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2414      {
2415      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2416      endlinetype = EL_ANYCRLF;
2417      }
2418    else
2419      {
2420      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2421      return 2;
2422      }
2423    
2424    /* Interpret the text values for -d and -D */
2425    
2426    if (dee_option != NULL)
2427      {
2428      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2429      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2430      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2431      else
2432      {      {
2433      strcpy(buffer, "\\b");      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2434      adjust = 2;      return 2;
2435      }      }
2436      }
2437    
2438    rdstart = buffer + adjust;  if (DEE_option != NULL)
2439    while (fgets(rdstart, MBUFTHIRD, f) != NULL)    {
2440      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2441      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2442      else
2443      {      {
2444      char *s = rdstart + (int)strlen(rdstart);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2445      if (pattern_count >= MAX_PATTERN_COUNT)      return 2;
       {  
       fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",  
         MAX_PATTERN_COUNT);  
       return 2;  
       }  
     while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;  
     if (s == rdstart) continue;  
     if (whole_lines) strcpy(s, ")$");  
       else if (word_match)strcpy(s, "\\b");  
         else *s = 0;  
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr - adjust, error);  
       return 2;  
       }  
2446      }      }
   fclose(f);  
2447    }    }
2448    
2449  /* If no file name, a single regex must be given inline. */  /* Check the values for Jeffrey Friedl's debugging options. */
2450    
2451  else  #ifdef JFRIEDL_DEBUG
2452    if (S_arg > 9)
2453      {
2454      fprintf(stderr, "pcregrep: bad value for -S option\n");
2455      return 2;
2456      }
2457    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2458      {
2459      if (jfriedl_XT == 0) jfriedl_XT = 1;
2460      if (jfriedl_XR == 0) jfriedl_XR = 1;
2461      }
2462    #endif
2463    
2464    /* Get memory to store the pattern and hints lists. */
2465    
2466    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2467    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2468    
2469    if (pattern_list == NULL || hints_list == NULL)
2470    {    {
2471    char buffer[MBUFTHIRD + 16];    fprintf(stderr, "pcregrep: malloc failed\n");
2472    char *pat;    goto EXIT2;
2473    int adjust = 0;    }
2474    
2475    /* If no patterns were provided by -e, and there is no file provided by -f,
2476    the first argument is the one and only pattern, and it must exist. */
2477    
2478    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2479      {
2480    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2481      patterns[cmd_pattern_count++] = argv[i++];
2482      }
2483    
2484    /* Compile the patterns that were provided on the command line, either by
2485    multiple uses of -e or as a single unkeyed pattern. */
2486    
2487    for (j = 0; j < cmd_pattern_count; j++)
2488      {
2489      if (!compile_pattern(patterns[j], pcre_options, NULL,
2490           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2491        goto EXIT2;
2492      }
2493    
2494    /* Compile the regular expressions that are provided in a file. */
2495    
2496    if (pattern_filename != NULL)
2497      {
2498      int linenumber = 0;
2499      FILE *f;
2500      char *filename;
2501      char buffer[MBUFTHIRD];
2502    
2503    if (whole_lines)    if (strcmp(pattern_filename, "-") == 0)
2504      {      {
2505      sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);      f = stdin;
2506      pat = buffer;      filename = stdin_name;
     adjust = 4;  
2507      }      }
2508    else if (word_match)    else
2509      {      {
2510      sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);      f = fopen(pattern_filename, "r");
2511      pat = buffer;      if (f == NULL)
2512      adjust = 2;        {
2513          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2514            strerror(errno));
2515          goto EXIT2;
2516          }
2517        filename = pattern_filename;
2518      }      }
   else pat = argv[i++];  
   
   pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);  
2519    
2520    if (pattern_list[0] == NULL)    while (fgets(buffer, MBUFTHIRD, f) != NULL)
2521      {      {
2522      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",      char *s = buffer + (int)strlen(buffer);
2523        errptr - adjust, error);      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2524      return 2;      *s = 0;
2525        linenumber++;
2526        if (buffer[0] == 0) continue;   /* Skip blank lines */
2527        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2528          goto EXIT2;
2529      }      }
2530    pattern_count++;  
2531      if (f != stdin) fclose(f);
2532    }    }
2533    
2534  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times */
# Line 1109  for (j = 0; j < pattern_count; j++) Line 2541  for (j = 0; j < pattern_count; j++)
2541      char s[16];      char s[16];
2542      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2543      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2544      return 2;      goto EXIT2;
2545        }
2546      hint_count++;
2547      }
2548    
2549    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2550    pcre_extra block for each pattern. */
2551    
2552    if (match_limit > 0 || match_limit_recursion > 0)
2553      {
2554      for (j = 0; j < pattern_count; j++)
2555        {
2556        if (hints_list[j] == NULL)
2557          {
2558          hints_list[j] = malloc(sizeof(pcre_extra));
2559          if (hints_list[j] == NULL)
2560            {
2561            fprintf(stderr, "pcregrep: malloc failed\n");
2562            pcregrep_exit(2);
2563            }
2564          }
2565        if (match_limit > 0)
2566          {
2567          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2568          hints_list[j]->match_limit = match_limit;
2569          }
2570        if (match_limit_recursion > 0)
2571          {
2572          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2573          hints_list[j]->match_limit_recursion = match_limit_recursion;
2574          }
2575      }      }
2576    }    }
2577    
# Line 1117  for (j = 0; j < pattern_count; j++) Line 2579  for (j = 0; j < pattern_count; j++)
2579    
2580  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
2581    {    {
2582    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2583        pcretables);
2584    if (exclude_compiled == NULL)    if (exclude_compiled == NULL)
2585      {      {
2586      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2587        errptr, error);        errptr, error);
2588      return 2;      goto EXIT2;
2589      }      }
2590    }    }
2591    
2592  if (include_pattern != NULL)  if (include_pattern != NULL)
2593    {    {
2594    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2595        pcretables);
2596    if (include_compiled == NULL)    if (include_compiled == NULL)
2597      {      {
2598      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2599        errptr, error);        errptr, error);
2600      return 2;      goto EXIT2;
2601        }
2602      }
2603    
2604    if (exclude_dir_pattern != NULL)
2605      {
2606      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2607        pcretables);
2608      if (exclude_dir_compiled == NULL)
2609        {
2610        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2611          errptr, error);
2612        goto EXIT2;
2613        }
2614      }
2615    
2616    if (include_dir_pattern != NULL)
2617      {
2618      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2619        pcretables);
2620      if (include_dir_compiled == NULL)
2621        {
2622        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2623          errptr, error);
2624        goto EXIT2;
2625      }      }
2626    }    }
2627    
2628  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2629    
2630  if (i >= argc) return pcregrep(stdin,  if (i >= argc)
2631    (filenames_only || filenames_nomatch_only)? stdin_name : NULL);    {
2632      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2633      goto EXIT;
2634      }
2635    
2636  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2637  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2638  the file name if the argument is not a directory and filenames_only is not set.  the file name if the argument is not a directory and filenames are not
2639  */  otherwise forced. */
2640    
2641  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2642    
2643  for (; i < argc; i++)  for (; i < argc; i++)
2644    {    {
2645    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2646        only_one_at_top);
2647    if (frc > 1) rc = frc;    if (frc > 1) rc = frc;
2648      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2649    }    }
2650    
2651  return rc;  EXIT:
2652    if (pattern_list != NULL)
2653      {
2654      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2655      free(pattern_list);
2656      }
2657    if (hints_list != NULL)
2658      {
2659      for (i = 0; i < hint_count; i++)
2660        {
2661        if (hints_list[i] != NULL) free(hints_list[i]);
2662        }
2663      free(hints_list);
2664      }
2665    pcregrep_exit(rc);
2666    
2667    EXIT2:
2668    rc = 2;
2669    goto EXIT;
2670  }  }
2671    
2672  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.77  
changed lines
  Added in v.589

  ViewVC Help
Powered by ViewVC 1.1.5