/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 1136 by chpe, Thu Oct 18 18:35:41 2012 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
# Line 45  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define VERSION "4.0 07-Jun-2005"  #define OFFSET_SIZE 99
 #define MAX_PATTERN_COUNT 100  
74    
75  #if BUFSIZ > 8192  #if BUFSIZ > 8192
76  #define MBUFTHIRD BUFSIZ  #define MAXPATLEN BUFSIZ
77  #else  #else
78  #define MBUFTHIRD 8192  #define MAXPATLEN 8192
79  #endif  #endif
80    
81    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
82    
83    /* Values for the "filenames" variable, which specifies options for file name
84    output. The order is important; it is assumed that a file name is wanted for
85    all values greater than FN_DEFAULT. */
86    
87    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88    
89    /* File reading styles */
90    
91    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
92    
93    /* Actions for the -d and -D options */
94    
95    enum { dee_READ, dee_SKIP, dee_RECURSE };
96    enum { DEE_READ, DEE_SKIP };
97    
98    /* Actions for special processing options (flag bits) */
99    
100    #define PO_WORD_MATCH     0x0001
101    #define PO_LINE_MATCH     0x0002
102    #define PO_FIXED_STRINGS  0x0004
103    
104    /* Line ending types */
105    
106    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107    
108    /* Binary file options */
109    
110    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111    
112    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113    environments), a warning is issued if the value of fwrite() is ignored.
114    Unfortunately, casting to (void) does not suppress the warning. To get round
115    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116    apply to fprintf(). */
117    
118    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119    
120    
121    
122  /*************************************************  /*************************************************
123  *               Global variables                 *  *               Global variables                 *
124  *************************************************/  *************************************************/
125    
126  static char *pattern_filename = NULL;  /* Jeffrey Friedl has some debugging requirements that are not part of the
127  static char *stdin_name = (char *)"(standard input)";  regular code. */
128  static int  pattern_count = 0;  
129  static pcre **pattern_list;  #ifdef JFRIEDL_DEBUG
130  static pcre_extra **hints_list;  static int S_arg = -1;
131    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
132    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
133    static const char *jfriedl_prefix = "";
134    static const char *jfriedl_postfix = "";
135    #endif
136    
137  static char *include_pattern = NULL;  static int  endlinetype;
 static char *exclude_pattern = NULL;  
138    
139  static pcre *include_compiled = NULL;  static char *colour_string = (char *)"1;31";
140  static pcre *exclude_compiled = NULL;  static char *colour_option = NULL;
141    static char *dee_option = NULL;
142    static char *DEE_option = NULL;
143    static char *locale = NULL;
144    static char *main_buffer = NULL;
145    static char *newline = NULL;
146    static char *om_separator = (char *)"";
147    static char *stdin_name = (char *)"(standard input)";
148    
149    static const unsigned char *pcretables = NULL;
150    
151  static int after_context = 0;  static int after_context = 0;
152  static int before_context = 0;  static int before_context = 0;
153    static int binary_files = BIN_BINARY;
154  static int both_context = 0;  static int both_context = 0;
155    static int bufthird = PCREGREP_BUFSIZE;
156    static int bufsize = 3*PCREGREP_BUFSIZE;
157    
158    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
159    static int dee_action = dee_SKIP;
160    #else
161    static int dee_action = dee_READ;
162    #endif
163    
164    static int DEE_action = DEE_READ;
165    static int error_count = 0;
166    static int filenames = FN_DEFAULT;
167    static int pcre_options = 0;
168    static int process_options = 0;
169    
170    #ifdef SUPPORT_PCREGREP_JIT
171    static int study_options = PCRE_STUDY_JIT_COMPILE;
172    #else
173    static int study_options = 0;
174    #endif
175    
176    static unsigned long int match_limit = 0;
177    static unsigned long int match_limit_recursion = 0;
178    
179  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
180  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
181  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
 static BOOL filenames_nomatch_only = FALSE;  
182  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
183  static BOOL invert = FALSE;  static BOOL invert = FALSE;
184    static BOOL line_buffered = FALSE;
185    static BOOL line_offsets = FALSE;
186  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
187  static BOOL number = FALSE;  static BOOL number = FALSE;
188    static BOOL omit_zero_count = FALSE;
189    static BOOL resource_error = FALSE;
190  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
191  static BOOL recurse = FALSE;  static BOOL show_only_matching = FALSE;
192  static BOOL silent = FALSE;  static BOOL silent = FALSE;
193  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
194  static BOOL word_match = FALSE;  
195    /* Structure for list of --only-matching capturing numbers. */
196    
197    typedef struct omstr {
198      struct omstr *next;
199      int groupnum;
200    } omstr;
201    
202    static omstr *only_matching = NULL;
203    static omstr *only_matching_last = NULL;
204    
205    /* Structure for holding the two variables that describe a number chain. */
206    
207    typedef struct omdatastr {
208      omstr **anchor;
209      omstr **lastptr;
210    } omdatastr;
211    
212    static omdatastr only_matching_data = { &only_matching, &only_matching_last };
213    
214    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
215    
216    typedef struct fnstr {
217      struct fnstr *next;
218      char *name;
219    } fnstr;
220    
221    static fnstr *exclude_from = NULL;
222    static fnstr *exclude_from_last = NULL;
223    static fnstr *include_from = NULL;
224    static fnstr *include_from_last = NULL;
225    
226    static fnstr *file_lists = NULL;
227    static fnstr *file_lists_last = NULL;
228    static fnstr *pattern_files = NULL;
229    static fnstr *pattern_files_last = NULL;
230    
231    /* Structure for holding the two variables that describe a file name chain. */
232    
233    typedef struct fndatastr {
234      fnstr **anchor;
235      fnstr **lastptr;
236    } fndatastr;
237    
238    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
239    static fndatastr include_from_data = { &include_from, &include_from_last };
240    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
241    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
242    
243    /* Structure for pattern and its compiled form; used for matching patterns and
244    also for include/exclude patterns. */
245    
246    typedef struct patstr {
247      struct patstr *next;
248      char *string;
249      pcre *compiled;
250      pcre_extra *hint;
251    } patstr;
252    
253    static patstr *patterns = NULL;
254    static patstr *patterns_last = NULL;
255    static patstr *include_patterns = NULL;
256    static patstr *include_patterns_last = NULL;
257    static patstr *exclude_patterns = NULL;
258    static patstr *exclude_patterns_last = NULL;
259    static patstr *include_dir_patterns = NULL;
260    static patstr *include_dir_patterns_last = NULL;
261    static patstr *exclude_dir_patterns = NULL;
262    static patstr *exclude_dir_patterns_last = NULL;
263    
264    /* Structure holding the two variables that describe a pattern chain. A pointer
265    to such structures is used for each appropriate option. */
266    
267    typedef struct patdatastr {
268      patstr **anchor;
269      patstr **lastptr;
270    } patdatastr;
271    
272    static patdatastr match_patdata = { &patterns, &patterns_last };
273    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
274    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
275    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
276    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
277    
278    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
279                                     &include_dir_patterns, &exclude_dir_patterns };
280    
281    static const char *incexname[4] = { "--include", "--exclude",
282                                        "--include-dir", "--exclude-dir" };
283    
284  /* Structure for options and list of them */  /* Structure for options and list of them */
285    
286  enum { OP_NODATA, OP_STRING, OP_NUMBER };  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
287           OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
288    
289  typedef struct option_item {  typedef struct option_item {
290    int type;    int type;
# Line 112  typedef struct option_item { Line 294  typedef struct option_item {
294    const char *help_text;    const char *help_text;
295  } option_item;  } option_item;
296    
297    /* Options without a single-letter equivalent get a negative value. This can be
298    used to identify them. */
299    
300    #define N_COLOUR       (-1)
301    #define N_EXCLUDE      (-2)
302    #define N_EXCLUDE_DIR  (-3)
303    #define N_HELP         (-4)
304    #define N_INCLUDE      (-5)
305    #define N_INCLUDE_DIR  (-6)
306    #define N_LABEL        (-7)
307    #define N_LOCALE       (-8)
308    #define N_NULL         (-9)
309    #define N_LOFFSETS     (-10)
310    #define N_FOFFSETS     (-11)
311    #define N_LBUFFER      (-12)
312    #define N_M_LIMIT      (-13)
313    #define N_M_LIMIT_REC  (-14)
314    #define N_BUFSIZE      (-15)
315    #define N_NOJIT        (-16)
316    #define N_FILE_LIST    (-17)
317    #define N_BINARY_FILES (-18)
318    #define N_EXCLUDE_FROM (-19)
319    #define N_INCLUDE_FROM (-20)
320    #define N_OM_SEPARATOR (-21)
321    
322  static option_item optionlist[] = {  static option_item optionlist[] = {
323    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
324    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
325    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
326    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
327    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
328    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
329    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
330    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
331    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
332    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
333    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
334    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
335    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
336    { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
337    { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
338    { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },    { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
339    { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
340    { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
341    { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
342    { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
343    { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },    { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
344    { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
345    { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },  #ifdef SUPPORT_PCREGREP_JIT
346    { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
347    { OP_NODATA, 0,   NULL,               NULL,            NULL }  #else
348      { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
349    #endif
350      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
351      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
352      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
353      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
354      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
355      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
356      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
357      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
358      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
359      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
360      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
361      { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
362      { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
363      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
364      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
365      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
366      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
367      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
368      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
369      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
370      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
371    
372      /* These two were accidentally implemented with underscores instead of
373      hyphens in the option names. As this was not discovered for several releases,
374      the incorrect versions are left in the table for compatibility. However, the
375      --help function misses out any option that has an underscore in its name. */
376    
377      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
378      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
379    
380    #ifdef JFRIEDL_DEBUG
381      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
382    #endif
383      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
384      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
385      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
386      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
387      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
388      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
389      { OP_NODATA,    0,        NULL,               NULL,            NULL }
390  };  };
391    
392    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
393    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
394    that the combination of -w and -x has the same effect as -x on its own, so we
395    can treat them as the same. Note that the MAXPATLEN macro assumes the longest
396    prefix+suffix is 10 characters; if anything longer is added, it must be
397    adjusted. */
398    
399    static const char *prefix[] = {
400      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
401    
402    static const char *suffix[] = {
403      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
404    
405    /* UTF-8 tables - used only when the newline setting is "any". */
406    
407    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
408    
409    const char utf8_table4[] = {
410      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
411      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
412      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
413      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
414    
415    
416    
417    /*************************************************
418    *         Exit from the program                  *
419    *************************************************/
420    
421    /* If there has been a resource error, give a suitable message.
422    
423    Argument:  the return code
424    Returns:   does not return
425    */
426    
427    static void
428    pcregrep_exit(int rc)
429    {
430    if (resource_error)
431      {
432      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
433        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
434        PCRE_ERROR_JIT_STACKLIMIT);
435      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
436      }
437    exit(rc);
438    }
439    
440    
441  /*************************************************  /*************************************************
442  *       Functions for directory scanning         *  *          Add item to chain of patterns         *
443    *************************************************/
444    
445    /* Used to add an item onto a chain, or just return an unconnected item if the
446    "after" argument is NULL.
447    
448    Arguments:
449      s          pattern string to add
450      after      if not NULL points to item to insert after
451    
452    Returns:     new pattern block
453    */
454    
455    static patstr *
456    add_pattern(char *s, patstr *after)
457    {
458    patstr *p = (patstr *)malloc(sizeof(patstr));
459    if (p == NULL)
460      {
461      fprintf(stderr, "pcregrep: malloc failed\n");
462      pcregrep_exit(2);
463      }
464    if (strlen(s) > MAXPATLEN)
465      {
466      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
467        MAXPATLEN);
468      return NULL;
469      }
470    p->next = NULL;
471    p->string = s;
472    p->compiled = NULL;
473    p->hint = NULL;
474    
475    if (after != NULL)
476      {
477      p->next = after->next;
478      after->next = p;
479      }
480    return p;
481    }
482    
483    
484    /*************************************************
485    *           Free chain of patterns               *
486    *************************************************/
487    
488    /* Used for several chains of patterns.
489    
490    Argument: pointer to start of chain
491    Returns:  nothing
492    */
493    
494    static void
495    free_pattern_chain(patstr *pc)
496    {
497    while (pc != NULL)
498      {
499      patstr *p = pc;
500      pc = p->next;
501      if (p->hint != NULL) pcre_free_study(p->hint);
502      if (p->compiled != NULL) pcre_free(p->compiled);
503      free(p);
504      }
505    }
506    
507    
508    /*************************************************
509    *           Free chain of file names             *
510    *************************************************/
511    
512    /*
513    Argument: pointer to start of chain
514    Returns:  nothing
515    */
516    
517    static void
518    free_file_chain(fnstr *fn)
519    {
520    while (fn != NULL)
521      {
522      fnstr *f = fn;
523      fn = f->next;
524      free(f);
525      }
526    }
527    
528    
529    /*************************************************
530    *            OS-specific functions               *
531  *************************************************/  *************************************************/
532    
533  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
534  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
535    
536    
537  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
538    
539  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
540  #include <sys/types.h>  #include <sys/types.h>
541  #include <sys/stat.h>  #include <sys/stat.h>
542  #include <dirent.h>  #include <dirent.h>
543    
544  typedef DIR directory_type;  typedef DIR directory_type;
545    #define FILESEP '/'
546    
547  static int  static int
548  isdirectory(char *filename)  isdirectory(char *filename)
# Line 165  isdirectory(char *filename) Line 550  isdirectory(char *filename)
550  struct stat statbuf;  struct stat statbuf;
551  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
552    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
553  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
554  }  }
555    
556  static directory_type *  static directory_type *
# Line 184  for (;;) Line 569  for (;;)
569    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
570      return dent->d_name;      return dent->d_name;
571    }    }
572  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
573  }  }
574    
575  static void  static void
# Line 194  closedir(dir); Line 579  closedir(dir);
579  }  }
580    
581    
582    /************* Test for regular file in Unix **********/
583    
584    static int
585    isregfile(char *filename)
586    {
587    struct stat statbuf;
588    if (stat(filename, &statbuf) < 0)
589      return 1;        /* In the expectation that opening as a file will fail */
590    return (statbuf.st_mode & S_IFMT) == S_IFREG;
591    }
592    
593    
594    /************* Test for a terminal in Unix **********/
595    
596    static BOOL
597    is_stdout_tty(void)
598    {
599    return isatty(fileno(stdout));
600    }
601    
602    static BOOL
603    is_file_tty(FILE *f)
604    {
605    return isatty(fileno(f));
606    }
607    
608    
609  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
610    
611  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
612  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
613  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
614    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
615    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
616    undefined when it is indeed undefined. */
617    
618    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
 #elif HAVE_WIN32API  
619    
620  #ifndef STRICT  #ifndef STRICT
621  # define STRICT  # define STRICT
# Line 209  when it did not exist. */ Line 623  when it did not exist. */
623  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
624  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
625  #endif  #endif
626    
627    #include <windows.h>
628    
629  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
630  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
631  #endif  #endif
632    
 #include <windows.h>  
   
633  typedef struct directory_type  typedef struct directory_type
634  {  {
635  HANDLE handle;  HANDLE handle;
# Line 222  BOOL first; Line 637  BOOL first;
637  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
638  } directory_type;  } directory_type;
639    
640    #define FILESEP '/'
641    
642  int  int
643  isdirectory(char *filename)  isdirectory(char *filename)
644  {  {
645  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
646  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
647    return 0;    return 0;
648  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
649  }  }
650    
651  directory_type *  directory_type *
# Line 239  char *pattern; Line 656  char *pattern;
656  directory_type *dir;  directory_type *dir;
657  DWORD err;  DWORD err;
658  len = strlen(filename);  len = strlen(filename);
659  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
660  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
661  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
662    {    {
663    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
664    exit(2);    pcregrep_exit(2);
665    }    }
666  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
667  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 292  free(dir); Line 709  free(dir);
709  }  }
710    
711    
712    /************* Test for regular file in Win32 **********/
713    
714    /* I don't know how to do this, or if it can be done; assume all paths are
715    regular if they are not directories. */
716    
717    int isregfile(char *filename)
718    {
719    return !isdirectory(filename);
720    }
721    
722    
723    /************* Test for a terminal in Win32 **********/
724    
725    /* I don't know how to do this; assume never */
726    
727    static BOOL
728    is_stdout_tty(void)
729    {
730    return FALSE;
731    }
732    
733    static BOOL
734    is_file_tty(FILE *f)
735    {
736    return FALSE;
737    }
738    
739    
740  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
741    
742  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
743    
744  #else  #else
745    
746    #define FILESEP 0
747  typedef void directory_type;  typedef void directory_type;
748    
749  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
750  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
751  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
752  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
753    
754    
755    /************* Test for regular when we can't do it **********/
756    
757    /* Assume all files are regular. */
758    
759    int isregfile(char *filename) { return 1; }
760    
761    
762    /************* Test for a terminal when we can't do it **********/
763    
764    static BOOL
765    is_stdout_tty(void)
766    {
767    return FALSE;
768    }
769    
770    static BOOL
771    is_file_tty(FILE *f)
772    {
773    return FALSE;
774    }
775    
776  #endif  #endif
777    
778    
779    
780  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
781  /*************************************************  /*************************************************
782  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
783  *************************************************/  *************************************************/
# Line 332  return sys_errlist[n]; Line 800  return sys_errlist[n];
800    
801    
802  /*************************************************  /*************************************************
803  *       Print the previous "after" lines         *  *                Usage function                  *
804  *************************************************/  *************************************************/
805    
806  /* This is called if we are about to lose said lines because of buffer filling,  static int
807  and at the end of the file.  usage(int rc)
   
 Arguments:  
   lastmatchnumber   the number of the last matching line, plus one  
   lastmatchrestart  where we restarted after the last match  
   endptr            end of available data  
   printname         filename for printing  
   
 Returns:            nothing  
 */  
   
 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,  
   char *endptr, char *printname)  
808  {  {
809  if (after_context > 0 && lastmatchnumber > 0)  option_item *op;
810    fprintf(stderr, "Usage: pcregrep [-");
811    for (op = optionlist; op->one_char != 0; op++)
812    {    {
813    int count = 0;    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
   while (lastmatchrestart < endptr && count++ < after_context)  
     {  
     char *pp = lastmatchrestart;  
     if (printname != NULL) fprintf(stdout, "%s-", printname);  
     if (number) fprintf(stdout, "%d-", lastmatchnumber++);  
     while (*pp != '\n') pp++;  
     fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);  
     lastmatchrestart = pp + 1;  
     }  
   hyphenpending = TRUE;  
814    }    }
815    fprintf(stderr, "] [long options] [pattern] [files]\n");
816    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
817      "options.\n");
818    return rc;
819  }  }
820    
821    
822    
823  /*************************************************  /*************************************************
824  *            Grep an individual file             *  *                Help function                   *
825  *************************************************/  *************************************************/
826    
827  /* This is called from grep_or_recurse() below. It uses a buffer that is three  static void
828  times the value of MBUFTHIRD. The matching point is never allowed to stray into  help(void)
829  the top third of the buffer, thus keeping more of the file available for  {
830  context printing or for multiline scanning. For large files, the pointer will  option_item *op;
 be in the middle third most of the time, so the bottom third is available for  
 "before" context printing.  
   
 Arguments:  
   in           the fopened FILE stream  
   printname    the file name if it is to be printed for each match  
                or NULL if the file name is not to be printed  
                it cannot be NULL if filenames[_nomatch]_only is set  
831    
832  Returns:       0 if there was at least one match  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
833                 1 otherwise (no matches)  printf("Search for PATTERN in each FILE or standard input.\n");
834  */  printf("PATTERN must be present if neither -e nor -f is used.\n");
835    printf("\"-\" can be used as a file name to mean STDIN.\n");
836    
837  static int  #ifdef SUPPORT_LIBZ
838  pcregrep(FILE *in, char *printname)  printf("Files whose names end in .gz are read using zlib.\n");
839  {  #endif
 int rc = 1;  
 int linenumber = 1;  
 int lastmatchnumber = 0;  
 int count = 0;  
 int offsets[99];  
 char *lastmatchrestart = NULL;  
 char buffer[3*MBUFTHIRD];  
 char *ptr = buffer;  
 char *endptr;  
 size_t bufflength;  
 BOOL endhyphenpending = FALSE;  
840    
841  /* Do the first read into the start of the buffer and set up the pointer to  #ifdef SUPPORT_LIBBZ2
842  end of what we have. */  printf("Files whose names end in .bz2 are read using bzlib2.\n");
843    #endif
844    
845  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
846  endptr = buffer + bufflength;  printf("Other files and the standard input are read as plain files.\n\n");
847    #else
848    printf("All files are read as plain files, without any interpretation.\n\n");
849    #endif
850    
851  /* Loop while the current pointer is not at the end of the file. For large  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
852  files, endptr will be at the end of the buffer when we are in the middle of the  printf("Options:\n");
 file, but ptr will never get there, because as soon as it gets over 2/3 of the  
 way, the buffer is shifted left and re-filled. */  
853    
854  while (ptr < endptr)  for (op = optionlist; op->one_char != 0; op++)
855    {    {
856    int i;    int n;
857    BOOL match = FALSE;    char s[4];
   char *t = ptr;  
   size_t length, linelength;  
   
   /* At this point, ptr is at the start of a line. We need to find the length  
   of the subject string to pass to pcre_exec(). In multiline mode, it is the  
   length remainder of the data in the buffer. Otherwise, it is the length of  
   the next line. After matching, we always advance by the length of the next  
   line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so  
   that any match is constrained to be in the first line. */  
858    
859    linelength = 0;    /* Two options were accidentally implemented and documented with underscores
860    while (t < endptr && *t++ != '\n') linelength++;    instead of hyphens in their names, something that was not noticed for quite a
861    length = multiline? endptr - ptr : linelength;    few releases. When fixing this, I left the underscored versions in the list
862      in case people were using them. However, we don't want to display them in the
863      help data. There are no other options that contain underscores, and we do not
864      expect ever to implement such options. Therefore, just omit any option that
865      contains an underscore. */
866    
867    /* Run through all the patterns until one matches. Note that we don't include    if (strchr(op->long_name, '_') != NULL) continue;
   the final newline in the subject string. */  
868    
869    for (i = 0; !match && i < pattern_count; i++)    if (op->one_char > 0 && (op->long_name)[0] == 0)
870        n = 31 - printf("  -%c", op->one_char);
871      else
872      {      {
873      match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
874        offsets, 99) >= 0;        else strcpy(s, "   ");
875        n = 31 - printf("  %s --%s", s, op->long_name);
876      }      }
877    
878    /* If it's a match or a not-match (as required), print what's wanted. */    if (n < 1) n = 1;
879      printf("%.*s%s\n", n, "                           ", op->help_text);
880      }
881    
882    if (match != invert)  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
883      {  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
884      BOOL hyphenprinted = FALSE;  printf("When reading patterns or file names from a file, trailing white\n");
885    printf("space is removed and blank lines are ignored.\n");
886    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
887    
888      if (filenames_nomatch_only) return 1;  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
889    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
890    }
891    
     if (count_only) count++;  
892    
     else if (filenames_only)  
       {  
       fprintf(stdout, "%s\n", printname);  
       return 0;  
       }  
893    
894      else if (quiet) return 0;  /*************************************************
895    *            Test exclude/includes               *
896    *************************************************/
897    
898    /* If any exclude pattern matches, the path is excluded. Otherwise, unless
899    there are no includes, the path must match an include pattern.
900    
901    Arguments:
902      path      the path to be matched
903      ip        the chain of include patterns
904      ep        the chain of exclude patterns
905    
906    Returns:    TRUE if the path is not excluded
907    */
908    
909    static BOOL
910    test_incexc(char *path, patstr *ip, patstr *ep)
911    {
912    int plen = strlen(path);
913    
914    for (; ep != NULL; ep = ep->next)
915      {
916      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
917        return FALSE;
918      }
919    
920    if (ip == NULL) return TRUE;
921    
922    for (; ip != NULL; ip = ip->next)
923      {
924      if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
925        return TRUE;
926      }
927    
928    return FALSE;
929    }
930    
931    
932    
933    /*************************************************
934    *         Decode integer argument value          *
935    *************************************************/
936    
937    /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
938    because SunOS4 doesn't have it. This is used only for unpicking arguments, so
939    just keep it simple.
940    
941    Arguments:
942      option_data   the option data string
943      op            the option item (for error messages)
944      longop        TRUE if option given in long form
945    
946    Returns:        a long integer
947    */
948    
949    static long int
950    decode_number(char *option_data, option_item *op, BOOL longop)
951    {
952    unsigned long int n = 0;
953    char *endptr = option_data;
954    while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
955    while (isdigit((unsigned char)(*endptr)))
956      n = n * 10 + (int)(*endptr++ - '0');
957    if (toupper(*endptr) == 'K')
958      {
959      n *= 1024;
960      endptr++;
961      }
962    else if (toupper(*endptr) == 'M')
963      {
964      n *= 1024*1024;
965      endptr++;
966      }
967    
968    if (*endptr != 0)   /* Error */
969      {
970      if (longop)
971        {
972        char *equals = strchr(op->long_name, '=');
973        int nlen = (equals == NULL)? (int)strlen(op->long_name) :
974          (int)(equals - op->long_name);
975        fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
976          option_data, nlen, op->long_name);
977        }
978      else
979        fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
980          option_data, op->one_char);
981      pcregrep_exit(usage(2));
982      }
983    
984    return n;
985    }
986    
987    
988    
989    /*************************************************
990    *       Add item to a chain of numbers           *
991    *************************************************/
992    
993    /* Used to add an item onto a chain, or just return an unconnected item if the
994    "after" argument is NULL.
995    
996    Arguments:
997      n          the number to add
998      after      if not NULL points to item to insert after
999    
1000    Returns:     new number block
1001    */
1002    
1003    static omstr *
1004    add_number(int n, omstr *after)
1005    {
1006    omstr *om = (omstr *)malloc(sizeof(omstr));
1007    
1008    if (om == NULL)
1009      {
1010      fprintf(stderr, "pcregrep: malloc failed\n");
1011      pcregrep_exit(2);
1012      }
1013    om->next = NULL;
1014    om->groupnum = n;
1015    
1016    if (after != NULL)
1017      {
1018      om->next = after->next;
1019      after->next = om;
1020      }
1021    return om;
1022    }
1023    
1024    
1025    
1026    /*************************************************
1027    *            Read one line of input              *
1028    *************************************************/
1029    
1030    /* Normally, input is read using fread() into a large buffer, so many lines may
1031    be read at once. However, doing this for tty input means that no output appears
1032    until a lot of input has been typed. Instead, tty input is handled line by
1033    line. We cannot use fgets() for this, because it does not stop at a binary
1034    zero, and therefore there is no way of telling how many characters it has read,
1035    because there may be binary zeros embedded in the data.
1036    
1037    Arguments:
1038      buffer     the buffer to read into
1039      length     the maximum number of characters to read
1040      f          the file
1041    
1042    Returns:     the number of characters read, zero at end of file
1043    */
1044    
1045    static unsigned int
1046    read_one_line(char *buffer, int length, FILE *f)
1047    {
1048    int c;
1049    int yield = 0;
1050    while ((c = fgetc(f)) != EOF)
1051      {
1052      buffer[yield++] = c;
1053      if (c == '\n' || yield >= length) break;
1054      }
1055    return yield;
1056    }
1057    
1058    
1059    
1060    /*************************************************
1061    *             Find end of line                   *
1062    *************************************************/
1063    
1064    /* The length of the endline sequence that is found is set via lenptr. This may
1065    be zero at the very end of the file if there is no line-ending sequence there.
1066    
1067    Arguments:
1068      p         current position in line
1069      endptr    end of available data
1070      lenptr    where to put the length of the eol sequence
1071    
1072    Returns:    pointer after the last byte of the line,
1073                including the newline byte(s)
1074    */
1075    
1076    static char *
1077    end_of_line(char *p, char *endptr, int *lenptr)
1078    {
1079    switch(endlinetype)
1080      {
1081      default:      /* Just in case */
1082      case EL_LF:
1083      while (p < endptr && *p != '\n') p++;
1084      if (p < endptr)
1085        {
1086        *lenptr = 1;
1087        return p + 1;
1088        }
1089      *lenptr = 0;
1090      return endptr;
1091    
1092      case EL_CR:
1093      while (p < endptr && *p != '\r') p++;
1094      if (p < endptr)
1095        {
1096        *lenptr = 1;
1097        return p + 1;
1098        }
1099      *lenptr = 0;
1100      return endptr;
1101    
1102      case EL_CRLF:
1103      for (;;)
1104        {
1105        while (p < endptr && *p != '\r') p++;
1106        if (++p >= endptr)
1107          {
1108          *lenptr = 0;
1109          return endptr;
1110          }
1111        if (*p == '\n')
1112          {
1113          *lenptr = 2;
1114          return p + 1;
1115          }
1116        }
1117      break;
1118    
1119      case EL_ANYCRLF:
1120      while (p < endptr)
1121        {
1122        int extra = 0;
1123        register int c = *((unsigned char *)p);
1124    
1125        if (utf8 && c >= 0xc0)
1126          {
1127          int gcii, gcss;
1128          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1129          gcss = 6*extra;
1130          c = (c & utf8_table3[extra]) << gcss;
1131          for (gcii = 1; gcii <= extra; gcii++)
1132            {
1133            gcss -= 6;
1134            c |= (p[gcii] & 0x3f) << gcss;
1135            }
1136          }
1137    
1138        p += 1 + extra;
1139    
1140        switch (c)
1141          {
1142          case '\n':
1143          *lenptr = 1;
1144          return p;
1145    
1146          case '\r':
1147          if (p < endptr && *p == '\n')
1148            {
1149            *lenptr = 2;
1150            p++;
1151            }
1152          else *lenptr = 1;
1153          return p;
1154    
1155          default:
1156          break;
1157          }
1158        }   /* End of loop for ANYCRLF case */
1159    
1160      *lenptr = 0;  /* Must have hit the end */
1161      return endptr;
1162    
1163      case EL_ANY:
1164      while (p < endptr)
1165        {
1166        int extra = 0;
1167        register int c = *((unsigned char *)p);
1168    
1169        if (utf8 && c >= 0xc0)
1170          {
1171          int gcii, gcss;
1172          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1173          gcss = 6*extra;
1174          c = (c & utf8_table3[extra]) << gcss;
1175          for (gcii = 1; gcii <= extra; gcii++)
1176            {
1177            gcss -= 6;
1178            c |= (p[gcii] & 0x3f) << gcss;
1179            }
1180          }
1181    
1182        p += 1 + extra;
1183    
1184        switch (c)
1185          {
1186          case '\n':    /* LF */
1187          case '\v':    /* VT */
1188          case '\f':    /* FF */
1189          *lenptr = 1;
1190          return p;
1191    
1192          case '\r':    /* CR */
1193          if (p < endptr && *p == '\n')
1194            {
1195            *lenptr = 2;
1196            p++;
1197            }
1198          else *lenptr = 1;
1199          return p;
1200    
1201    #ifndef EBCDIC
1202          case 0x85:    /* Unicode NEL */
1203          *lenptr = utf8? 2 : 1;
1204          return p;
1205    
1206          case 0x2028:  /* Unicode LS */
1207          case 0x2029:  /* Unicode PS */
1208          *lenptr = 3;
1209          return p;
1210    #endif  /* Not EBCDIC */
1211    
1212          default:
1213          break;
1214          }
1215        }   /* End of loop for ANY case */
1216    
1217      *lenptr = 0;  /* Must have hit the end */
1218      return endptr;
1219      }     /* End of overall switch */
1220    }
1221    
1222    
1223    
1224    /*************************************************
1225    *         Find start of previous line            *
1226    *************************************************/
1227    
1228    /* This is called when looking back for before lines to print.
1229    
1230    Arguments:
1231      p         start of the subsequent line
1232      startptr  start of available data
1233    
1234    Returns:    pointer to the start of the previous line
1235    */
1236    
1237    static char *
1238    previous_line(char *p, char *startptr)
1239    {
1240    switch(endlinetype)
1241      {
1242      default:      /* Just in case */
1243      case EL_LF:
1244      p--;
1245      while (p > startptr && p[-1] != '\n') p--;
1246      return p;
1247    
1248      case EL_CR:
1249      p--;
1250      while (p > startptr && p[-1] != '\n') p--;
1251      return p;
1252    
1253      case EL_CRLF:
1254      for (;;)
1255        {
1256        p -= 2;
1257        while (p > startptr && p[-1] != '\n') p--;
1258        if (p <= startptr + 1 || p[-2] == '\r') return p;
1259        }
1260      return p;   /* But control should never get here */
1261    
1262      case EL_ANY:
1263      case EL_ANYCRLF:
1264      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1265      if (utf8) while ((*p & 0xc0) == 0x80) p--;
1266    
1267      while (p > startptr)
1268        {
1269        register unsigned int c;
1270        char *pp = p - 1;
1271    
1272        if (utf8)
1273          {
1274          int extra = 0;
1275          while ((*pp & 0xc0) == 0x80) pp--;
1276          c = *((unsigned char *)pp);
1277          if (c >= 0xc0)
1278            {
1279            int gcii, gcss;
1280            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1281            gcss = 6*extra;
1282            c = (c & utf8_table3[extra]) << gcss;
1283            for (gcii = 1; gcii <= extra; gcii++)
1284              {
1285              gcss -= 6;
1286              c |= (pp[gcii] & 0x3f) << gcss;
1287              }
1288            }
1289          }
1290        else c = *((unsigned char *)pp);
1291    
1292        if (endlinetype == EL_ANYCRLF) switch (c)
1293          {
1294          case '\n':    /* LF */
1295          case '\r':    /* CR */
1296          return p;
1297    
1298          default:
1299          break;
1300          }
1301    
1302        else switch (c)
1303          {
1304          case '\n':    /* LF */
1305          case '\v':    /* VT */
1306          case '\f':    /* FF */
1307          case '\r':    /* CR */
1308    #ifndef EBCDIE
1309          case 0x85:    /* Unicode NEL */
1310          case 0x2028:  /* Unicode LS */
1311          case 0x2029:  /* Unicode PS */
1312    #endif  /* Not EBCDIC */
1313          return p;
1314    
1315          default:
1316          break;
1317          }
1318    
1319        p = pp;  /* Back one character */
1320        }        /* End of loop for ANY case */
1321    
1322      return startptr;  /* Hit start of data */
1323      }     /* End of overall switch */
1324    }
1325    
1326    
1327    
1328    
1329    
1330    /*************************************************
1331    *       Print the previous "after" lines         *
1332    *************************************************/
1333    
1334    /* This is called if we are about to lose said lines because of buffer filling,
1335    and at the end of the file. The data in the line is written using fwrite() so
1336    that a binary zero does not terminate it.
1337    
1338    Arguments:
1339      lastmatchnumber   the number of the last matching line, plus one
1340      lastmatchrestart  where we restarted after the last match
1341      endptr            end of available data
1342      printname         filename for printing
1343    
1344    Returns:            nothing
1345    */
1346    
1347    static void
1348    do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1349      char *printname)
1350    {
1351    if (after_context > 0 && lastmatchnumber > 0)
1352      {
1353      int count = 0;
1354      while (lastmatchrestart < endptr && count++ < after_context)
1355        {
1356        int ellength;
1357        char *pp = lastmatchrestart;
1358        if (printname != NULL) fprintf(stdout, "%s-", printname);
1359        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1360        pp = end_of_line(pp, endptr, &ellength);
1361        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1362        lastmatchrestart = pp;
1363        }
1364      hyphenpending = TRUE;
1365      }
1366    }
1367    
1368    
1369    
1370    /*************************************************
1371    *   Apply patterns to subject till one matches   *
1372    *************************************************/
1373    
1374    /* This function is called to run through all patterns, looking for a match. It
1375    is used multiple times for the same subject when colouring is enabled, in order
1376    to find all possible matches.
1377    
1378    Arguments:
1379      matchptr     the start of the subject
1380      length       the length of the subject to match
1381      startoffset  where to start matching
1382      offsets      the offets vector to fill in
1383      mrc          address of where to put the result of pcre_exec()
1384    
1385    Returns:      TRUE if there was a match
1386                  FALSE if there was no match
1387                  invert if there was a non-fatal error
1388    */
1389    
1390    static BOOL
1391    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
1392      int *mrc)
1393    {
1394    int i;
1395    size_t slen = length;
1396    patstr *p = patterns;
1397    const char *msg = "this text:\n\n";
1398    
1399    if (slen > 200)
1400      {
1401      slen = 200;
1402      msg = "text that starts:\n\n";
1403      }
1404    for (i = 1; p != NULL; p = p->next, i++)
1405      {
1406      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1407        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1408      if (*mrc >= 0) return TRUE;
1409      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1410      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1411      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1412      fprintf(stderr, "%s", msg);
1413      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1414      fprintf(stderr, "\n\n");
1415      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1416          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1417        resource_error = TRUE;
1418      if (error_count++ > 20)
1419        {
1420        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1421        pcregrep_exit(2);
1422        }
1423      return invert;    /* No more matching; don't show the line again */
1424      }
1425    
1426    return FALSE;  /* No match, no errors */
1427    }
1428    
1429    
1430    
1431    /*************************************************
1432    *            Grep an individual file             *
1433    *************************************************/
1434    
1435    /* This is called from grep_or_recurse() below. It uses a buffer that is three
1436    times the value of bufthird. The matching point is never allowed to stray into
1437    the top third of the buffer, thus keeping more of the file available for
1438    context printing or for multiline scanning. For large files, the pointer will
1439    be in the middle third most of the time, so the bottom third is available for
1440    "before" context printing.
1441    
1442    Arguments:
1443      handle       the fopened FILE stream for a normal file
1444                   the gzFile pointer when reading is via libz
1445                   the BZFILE pointer when reading is via libbz2
1446      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1447      filename     the file name or NULL (for errors)
1448      printname    the file name if it is to be printed for each match
1449                   or NULL if the file name is not to be printed
1450                   it cannot be NULL if filenames[_nomatch]_only is set
1451    
1452    Returns:       0 if there was at least one match
1453                   1 otherwise (no matches)
1454                   2 if an overlong line is encountered
1455                   3 if there is a read error on a .bz2 file
1456    */
1457    
1458    static int
1459    pcregrep(void *handle, int frtype, char *filename, char *printname)
1460    {
1461    int rc = 1;
1462    int linenumber = 1;
1463    int lastmatchnumber = 0;
1464    int count = 0;
1465    int filepos = 0;
1466    int offsets[OFFSET_SIZE];
1467    char *lastmatchrestart = NULL;
1468    char *ptr = main_buffer;
1469    char *endptr;
1470    size_t bufflength;
1471    BOOL binary = FALSE;
1472    BOOL endhyphenpending = FALSE;
1473    BOOL input_line_buffered = line_buffered;
1474    FILE *in = NULL;                    /* Ensure initialized */
1475    
1476    #ifdef SUPPORT_LIBZ
1477    gzFile ingz = NULL;
1478    #endif
1479    
1480    #ifdef SUPPORT_LIBBZ2
1481    BZFILE *inbz2 = NULL;
1482    #endif
1483    
1484    
1485    /* Do the first read into the start of the buffer and set up the pointer to end
1486    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1487    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1488    fail. */
1489    
1490    (void)frtype;
1491    
1492    #ifdef SUPPORT_LIBZ
1493    if (frtype == FR_LIBZ)
1494      {
1495      ingz = (gzFile)handle;
1496      bufflength = gzread (ingz, main_buffer, bufsize);
1497      }
1498    else
1499    #endif
1500    
1501    #ifdef SUPPORT_LIBBZ2
1502    if (frtype == FR_LIBBZ2)
1503      {
1504      inbz2 = (BZFILE *)handle;
1505      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1506      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1507      }                                    /* without the cast it is unsigned. */
1508    else
1509    #endif
1510    
1511      {
1512      in = (FILE *)handle;
1513      if (is_file_tty(in)) input_line_buffered = TRUE;
1514      bufflength = input_line_buffered?
1515        read_one_line(main_buffer, bufsize, in) :
1516        fread(main_buffer, 1, bufsize, in);
1517      }
1518    
1519    endptr = main_buffer + bufflength;
1520    
1521    /* Unless binary-files=text, see if we have a binary file. This uses the same
1522    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1523    file. */
1524    
1525    if (binary_files != BIN_TEXT)
1526      {
1527      binary =
1528        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1529      if (binary && binary_files == BIN_NOMATCH) return 1;
1530      }
1531    
1532    /* Loop while the current pointer is not at the end of the file. For large
1533    files, endptr will be at the end of the buffer when we are in the middle of the
1534    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1535    way, the buffer is shifted left and re-filled. */
1536    
1537    while (ptr < endptr)
1538      {
1539      int endlinelength;
1540      int mrc = 0;
1541      int startoffset = 0;
1542      BOOL match;
1543      char *matchptr = ptr;
1544      char *t = ptr;
1545      size_t length, linelength;
1546    
1547      /* At this point, ptr is at the start of a line. We need to find the length
1548      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1549      length remainder of the data in the buffer. Otherwise, it is the length of
1550      the next line, excluding the terminating newline. After matching, we always
1551      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1552      option is used for compiling, so that any match is constrained to be in the
1553      first line. */
1554    
1555      t = end_of_line(t, endptr, &endlinelength);
1556      linelength = t - ptr - endlinelength;
1557      length = multiline? (size_t)(endptr - ptr) : linelength;
1558    
1559      /* Check to see if the line we are looking at extends right to the very end
1560      of the buffer without a line terminator. This means the line is too long to
1561      handle. */
1562    
1563      if (endlinelength == 0 && t == main_buffer + bufsize)
1564        {
1565        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1566                        "pcregrep: check the --buffer-size option\n",
1567                        linenumber,
1568                        (filename == NULL)? "" : " of file ",
1569                        (filename == NULL)? "" : filename);
1570        return 2;
1571        }
1572    
1573      /* Extra processing for Jeffrey Friedl's debugging. */
1574    
1575    #ifdef JFRIEDL_DEBUG
1576      if (jfriedl_XT || jfriedl_XR)
1577      {
1578          #include <sys/time.h>
1579          #include <time.h>
1580          struct timeval start_time, end_time;
1581          struct timezone dummy;
1582          int i;
1583    
1584          if (jfriedl_XT)
1585          {
1586              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1587              const char *orig = ptr;
1588              ptr = malloc(newlen + 1);
1589              if (!ptr) {
1590                      printf("out of memory");
1591                      pcregrep_exit(2);
1592              }
1593              endptr = ptr;
1594              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1595              for (i = 0; i < jfriedl_XT; i++) {
1596                      strncpy(endptr, orig,  length);
1597                      endptr += length;
1598              }
1599              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1600              length = newlen;
1601          }
1602    
1603          if (gettimeofday(&start_time, &dummy) != 0)
1604                  perror("bad gettimeofday");
1605    
1606    
1607          for (i = 0; i < jfriedl_XR; i++)
1608              match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1609                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1610    
1611          if (gettimeofday(&end_time, &dummy) != 0)
1612                  perror("bad gettimeofday");
1613    
1614          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1615                          -
1616                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1617    
1618          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1619          return 0;
1620      }
1621    #endif
1622    
1623      /* We come back here after a match when show_only_matching is set, in order
1624      to find any further matches in the same line. This applies to
1625      --only-matching, --file-offsets, and --line-offsets. */
1626    
1627      ONLY_MATCHING_RESTART:
1628    
1629      /* Run through all the patterns until one matches or there is an error other
1630      than NOMATCH. This code is in a subroutine so that it can be re-used for
1631      finding subsequent matches when colouring matched lines. */
1632    
1633      match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1634    
1635      /* If it's a match or a not-match (as required), do what's wanted. */
1636    
1637      if (match != invert)
1638        {
1639        BOOL hyphenprinted = FALSE;
1640    
1641        /* We've failed if we want a file that doesn't have any matches. */
1642    
1643        if (filenames == FN_NOMATCH_ONLY) return 1;
1644    
1645        /* Just count if just counting is wanted. */
1646    
1647        if (count_only) count++;
1648    
1649        /* When handling a binary file and binary-files==binary, the "binary"
1650        variable will be set true (it's false in all other cases). In this
1651        situation we just want to output the file name. No need to scan further. */
1652    
1653        else if (binary)
1654          {
1655          fprintf(stdout, "Binary file %s matches\n", filename);
1656          return 0;
1657          }
1658    
1659        /* If all we want is a file name, there is no need to scan any more lines
1660        in the file. */
1661    
1662        else if (filenames == FN_MATCH_ONLY)
1663          {
1664          fprintf(stdout, "%s\n", printname);
1665          return 0;
1666          }
1667    
1668        /* Likewise, if all we want is a yes/no answer. */
1669    
1670        else if (quiet) return 0;
1671    
1672        /* The --only-matching option prints just the substring that matched,
1673        and/or one or more captured portions of it, as long as these strings are
1674        not empty. The --file-offsets and --line-offsets options output offsets for
1675        the matching substring (all three set show_only_matching). None of these
1676        mutually exclusive options prints any context. Afterwards, adjust the start
1677        and then jump back to look for further matches in the same line. If we are
1678        in invert mode, however, nothing is printed and we do not restart - this
1679        could still be useful because the return code is set. */
1680    
1681        else if (show_only_matching)
1682          {
1683          if (!invert)
1684            {
1685            if (printname != NULL) fprintf(stdout, "%s:", printname);
1686            if (number) fprintf(stdout, "%d:", linenumber);
1687    
1688            /* Handle --line-offsets */
1689    
1690            if (line_offsets)
1691              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1692                offsets[1] - offsets[0]);
1693    
1694            /* Handle --file-offsets */
1695    
1696            else if (file_offsets)
1697              fprintf(stdout, "%d,%d\n",
1698                (int)(filepos + matchptr + offsets[0] - ptr),
1699                offsets[1] - offsets[0]);
1700    
1701            /* Handle --only-matching, which may occur many times */
1702    
1703            else
1704              {
1705              BOOL printed = FALSE;
1706              omstr *om;
1707    
1708              for (om = only_matching; om != NULL; om = om->next)
1709                {
1710                int n = om->groupnum;
1711                if (n < mrc)
1712                  {
1713                  int plen = offsets[2*n + 1] - offsets[2*n];
1714                  if (plen > 0)
1715                    {
1716                    if (printed) fprintf(stdout, "%s", om_separator);
1717                    if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1718                    FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1719                    if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1720                    printed = TRUE;
1721                    }
1722                  }
1723                }
1724    
1725              if (printed || printname != NULL || number) fprintf(stdout, "\n");
1726              }
1727    
1728            /* Prepare to repeat to find the next match */
1729    
1730            match = FALSE;
1731            if (line_buffered) fflush(stdout);
1732            rc = 0;                      /* Had some success */
1733            startoffset = offsets[1];    /* Restart after the match */
1734            goto ONLY_MATCHING_RESTART;
1735            }
1736          }
1737    
1738        /* This is the default case when none of the above options is set. We print
1739        the matching lines(s), possibly preceded and/or followed by other lines of
1740        context. */
1741    
1742      else      else
1743        {        {
# Line 467  while (ptr < endptr) Line 1746  while (ptr < endptr)
1746    
1747        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1748          {          {
1749            int ellength;
1750          int linecount = 0;          int linecount = 0;
1751          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1752    
1753          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1754            {            {
1755            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1756            linecount++;            linecount++;
1757            }            }
1758    
1759          /* It is important to advance lastmatchrestart during this printing so          /* It is important to advance lastmatchrestart during this printing so
1760          that it interacts correctly with any "before" printing below. */          that it interacts correctly with any "before" printing below. Print
1761            each line's data using fwrite() in case there are binary zeroes. */
1762    
1763          while (lastmatchrestart < p)          while (lastmatchrestart < p)
1764            {            {
1765            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1766            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1767            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1768            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1769            fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1770            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1771            }            }
1772          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1773          }          }
# Line 509  while (ptr < endptr) Line 1789  while (ptr < endptr)
1789          int linecount = 0;          int linecount = 0;
1790          char *p = ptr;          char *p = ptr;
1791    
1792          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1793                 linecount++ < before_context)                 linecount < before_context)
1794            {            {
1795            p--;            linecount++;
1796            while (p > buffer && p[-1] != '\n') p--;            p = previous_line(p, main_buffer);
1797            }            }
1798    
1799          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 521  while (ptr < endptr) Line 1801  while (ptr < endptr)
1801    
1802          while (p < ptr)          while (p < ptr)
1803            {            {
1804              int ellength;
1805            char *pp = p;            char *pp = p;
1806            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1807            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1808            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1809            fprintf(stdout, "%.*s", pp - p + 1, p);            FWRITE(p, 1, pp - p, stdout);
1810            p = pp + 1;            p = pp;
1811            }            }
1812          }          }
1813    
1814        /* Now print the matching line(s); ensure we set hyphenpending at the end        /* Now print the matching line(s); ensure we set hyphenpending at the end
1815        of the file. */        of the file if any context lines are being output. */
1816    
1817          if (after_context > 0 || before_context > 0)
1818            endhyphenpending = TRUE;
1819    
       endhyphenpending = TRUE;  
1820        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (printname != NULL) fprintf(stdout, "%s:", printname);
1821        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
1822    
1823        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1824        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1825        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1826        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1827          the match will always be before the first newline sequence. */
1828    
1829        if (multiline)        if (multiline & !invert)
1830          {          {
1831          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1832          t = ptr;          t = ptr;
1833          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch)
1834          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1835          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &endlinelength);
1836              if (t < endmatch) linenumber++; else break;
1837              }
1838            linelength = t - ptr - endlinelength;
1839            }
1840    
1841          /*** NOTE: Use only fwrite() to output the data line, so that binary
1842          zeroes are treated as just another data character. */
1843    
1844          /* This extra option, for Jeffrey Friedl's debugging requirements,
1845          replaces the matched string, or a specific captured string if it exists,
1846          with X. When this happens, colouring is ignored. */
1847    
1848    #ifdef JFRIEDL_DEBUG
1849          if (S_arg >= 0 && S_arg < mrc)
1850            {
1851            int first = S_arg * 2;
1852            int last  = first + 1;
1853            FWRITE(ptr, 1, offsets[first], stdout);
1854            fprintf(stdout, "X");
1855            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1856            }
1857          else
1858    #endif
1859    
1860          /* We have to split the line(s) up if colouring, and search for further
1861          matches, but not of course if the line is a non-match. */
1862    
1863          if (do_colour && !invert)
1864            {
1865            int plength;
1866            FWRITE(ptr, 1, offsets[0], stdout);
1867            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1868            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1869            fprintf(stdout, "%c[00m", 0x1b);
1870            for (;;)
1871              {
1872              startoffset = offsets[1];
1873              if (startoffset >= (int)linelength + endlinelength ||
1874                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1875                break;
1876              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1877              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1878              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1879              fprintf(stdout, "%c[00m", 0x1b);
1880              }
1881    
1882            /* In multiline mode, we may have already printed the complete line
1883            and its line-ending characters (if they matched the pattern), so there
1884            may be no more to print. */
1885    
1886            plength = (int)((linelength + endlinelength) - startoffset);
1887            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1888          }          }
1889    
1890        fprintf(stdout, "%.*s\n", linelength, ptr);        /* Not colouring; no need to search for further matches */
1891    
1892          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1893        }        }
1894    
1895        /* End of doing what has to be done for a match. If --line-buffered was
1896        given, flush the output. */
1897    
1898        if (line_buffered) fflush(stdout);
1899      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1900    
1901      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1902      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1903    
1904      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1905      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1906      }      }
1907    
1908    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1909      anything to be printed), we have to move on to the end of the match before
1910      proceeding. */
1911    
1912      if (multiline && invert && match)
1913        {
1914        int ellength;
1915        char *endmatch = ptr + offsets[1];
1916        t = ptr;
1917        while (t < endmatch)
1918          {
1919          t = end_of_line(t, endptr, &ellength);
1920          if (t <= endmatch) linenumber++; else break;
1921          }
1922        endmatch = end_of_line(endmatch, endptr, &ellength);
1923        linelength = endmatch - ptr - ellength;
1924        }
1925    
1926      /* Advance to after the newline and increment the line number. The file
1927      offset to the current line is maintained in filepos. */
1928    
1929    ptr += linelength + 1;    ptr += linelength + endlinelength;
1930      filepos += (int)(linelength + endlinelength);
1931    linenumber++;    linenumber++;
1932    
1933      /* If input is line buffered, and the buffer is not yet full, read another
1934      line and add it into the buffer. */
1935    
1936      if (input_line_buffered && bufflength < (size_t)bufsize)
1937        {
1938        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1939        bufflength += add;
1940        endptr += add;
1941        }
1942    
1943    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1944    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1945    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1946    about to be lost, print them. */    about to be lost, print them. */
1947    
1948    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1949      {      {
1950      if (after_context > 0 &&      if (after_context > 0 &&
1951          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1952          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1953        {        {
1954        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1955        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 585  while (ptr < endptr) Line 1957  while (ptr < endptr)
1957    
1958      /* Now do the shuffle */      /* Now do the shuffle */
1959    
1960      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1961      ptr -= MBUFTHIRD;      ptr -= bufthird;
1962      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1963      endptr = buffer + bufflength;  #ifdef SUPPORT_LIBZ
1964        if (frtype == FR_LIBZ)
1965          bufflength = 2*bufthird +
1966            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1967        else
1968    #endif
1969    
1970    #ifdef SUPPORT_LIBBZ2
1971        if (frtype == FR_LIBBZ2)
1972          bufflength = 2*bufthird +
1973            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1974        else
1975    #endif
1976    
1977        bufflength = 2*bufthird +
1978          (input_line_buffered?
1979           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1980           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1981        endptr = main_buffer + bufflength;
1982    
1983      /* Adjust any last match point */      /* Adjust any last match point */
1984    
1985      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1986      }      }
1987    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1988    
1989  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1990  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1991    
1992  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);  if (!show_only_matching && !count_only)
1993  hyphenpending |= endhyphenpending;    {
1994      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1995      hyphenpending |= endhyphenpending;
1996      }
1997    
1998  /* Print the file name if we are looking for those without matches and there  /* Print the file name if we are looking for those without matches and there
1999  were none. If we found a match, we won't have got this far. */  were none. If we found a match, we won't have got this far. */
2000    
2001  if (filenames_nomatch_only)  if (filenames == FN_NOMATCH_ONLY)
2002    {    {
2003    fprintf(stdout, "%s\n", printname);    fprintf(stdout, "%s\n", printname);
2004    return 0;    return 0;
# Line 615  if (filenames_nomatch_only) Line 2008  if (filenames_nomatch_only)
2008    
2009  if (count_only)  if (count_only)
2010    {    {
2011    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
2012    fprintf(stdout, "%d\n", count);      {
2013        if (printname != NULL && filenames != FN_NONE)
2014          fprintf(stdout, "%s:", printname);
2015        fprintf(stdout, "%d\n", count);
2016        }
2017    }    }
2018    
2019  return rc;  return rc;
# Line 633  recursing; if it's a file, grep it. Line 2030  recursing; if it's a file, grep it.
2030    
2031  Arguments:  Arguments:
2032    pathname          the path to investigate    pathname          the path to investigate
2033    dir_recurse       TRUE if recursing is wanted (-r)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   show_filenames    TRUE if file names are wanted for multiple files, except  
                       for the only file at top level when not filenames_only  
2034    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
2035    
2036  Returns:   0 if there was at least one match  Returns:  -1 the file/directory was skipped
2037               0 if there was at least one match
2038             1 if there were no matches             1 if there were no matches
2039             2 there was some kind of error             2 there was some kind of error
2040    
# Line 646  However, file opening failures are suppr Line 2042  However, file opening failures are suppr
2042  */  */
2043    
2044  static int  static int
2045  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
2046  {  {
2047  int rc = 1;  int rc = 1;
2048  int sep;  int frtype;
2049  FILE *in;  void *handle;
2050  char *printname;  char *lastcomp;
2051    FILE *in = NULL;           /* Ensure initialized */
2052    
2053    #ifdef SUPPORT_LIBZ
2054    gzFile ingz = NULL;
2055    #endif
2056    
2057    #ifdef SUPPORT_LIBBZ2
2058    BZFILE *inbz2 = NULL;
2059    #endif
2060    
2061    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2062    int pathlen;
2063    #endif
2064    
2065  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
2066    
2067  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
2068    {    {
2069    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN, stdin_name,
2070      (filenames_only || filenames_nomatch_only ||      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
     (show_filenames && !only_one_at_top))?  
2071        stdin_name : NULL);        stdin_name : NULL);
2072    }    }
2073    
2074  /* If the file is a directory and we are recursing, scan each file within it,  /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2075  subject to any include or exclude patterns that were set. The scanning code is  directories, whereas --include and --exclude apply to everything else. The test
2076  localized so it can be made system-specific. */  is against the final component of the path. */
2077    
2078    lastcomp = strrchr(pathname, FILESEP);
2079    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2080    
2081  if ((sep = isdirectory(pathname)) != 0 && dir_recurse)  /* If the file is a directory, skip if not recursing or if explicitly excluded.
2082    Otherwise, scan the directory and recurse for each path within it. The scanning
2083    code is localized so it can be made system-specific. */
2084    
2085    if (isdirectory(pathname))
2086    {    {
2087    char buffer[1024];    if (dee_action == dee_SKIP ||
2088    char *nextfile;        !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2089    directory_type *dir = opendirectory(pathname);      return -1;
2090    
2091      if (dee_action == dee_RECURSE)
2092        {
2093        char buffer[1024];
2094        char *nextfile;
2095        directory_type *dir = opendirectory(pathname);
2096    
2097        if (dir == NULL)
2098          {
2099          if (!silent)
2100            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2101              strerror(errno));
2102          return 2;
2103          }
2104    
2105        while ((nextfile = readdirectory(dir)) != NULL)
2106          {
2107          int frc;
2108          sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2109          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2110          if (frc > 1) rc = frc;
2111           else if (frc == 0 && rc == 1) rc = 0;
2112          }
2113    
2114        closedirectory(dir);
2115        return rc;
2116        }
2117      }
2118    
2119    /* If the file is not a directory and not a regular file, skip it if that's
2120    been requested. Otherwise, check for explicit include/exclude. */
2121    
2122    else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2123              !test_incexc(lastcomp, include_patterns, exclude_patterns))
2124            return -1;
2125    
2126    /* Control reaches here if we have a regular file, or if we have a directory
2127    and recursion or skipping was not requested, or if we have anything else and
2128    skipping was not requested. The scan proceeds. If this is the first and only
2129    argument at top level, we don't show the file name, unless we are only showing
2130    the file name, or the filename was forced (-H). */
2131    
2132    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2133    pathlen = (int)(strlen(pathname));
2134    #endif
2135    
2136    if (dir == NULL)  /* Open using zlib if it is supported and the file name ends with .gz. */
2137    
2138    #ifdef SUPPORT_LIBZ
2139    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2140      {
2141      ingz = gzopen(pathname, "rb");
2142      if (ingz == NULL)
2143      {      {
2144      if (!silent)      if (!silent)
2145        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2146          strerror(errno));          strerror(errno));
2147      return 2;      return 2;
2148      }      }
2149      handle = (void *)ingz;
2150      frtype = FR_LIBZ;
2151      }
2152    else
2153    #endif
2154    
2155    while ((nextfile = readdirectory(dir)) != NULL)  /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
     {  
     int frc, blen;  
     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);  
     blen = strlen(buffer);  
   
     if (exclude_compiled != NULL &&  
         pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
       continue;  
2156    
2157      if (include_compiled != NULL &&  #ifdef SUPPORT_LIBBZ2
2158          pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2159        continue;    {
2160      inbz2 = BZ2_bzopen(pathname, "rb");
2161      handle = (void *)inbz2;
2162      frtype = FR_LIBBZ2;
2163      }
2164    else
2165    #endif
2166    
2167      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);  /* Otherwise use plain fopen(). The label is so that we can come back here if
2168      if (frc > 1) rc = frc;  an attempt to read a .bz2 file indicates that it really is a plain file. */
      else if (frc == 0 && rc == 1) rc = 0;  
     }  
2169    
2170    closedirectory(dir);  #ifdef SUPPORT_LIBBZ2
2171    return rc;  PLAIN_FILE:
2172    #endif
2173      {
2174      in = fopen(pathname, "rb");
2175      handle = (void *)in;
2176      frtype = FR_PLAIN;
2177    }    }
2178    
2179  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* All the opening methods return errno when they fail. */
 the first and only argument at top level, we don't show the file name (unless  
 we are only showing the file name). Otherwise, control is via the  
 show_filenames variable. */  
2180    
2181  in = fopen(pathname, "r");  if (handle == NULL)
 if (in == NULL)  
2182    {    {
2183    if (!silent)    if (!silent)
2184      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 719  if (in == NULL) Line 2186  if (in == NULL)
2186    return 2;    return 2;
2187    }    }
2188    
2189  printname =  (filenames_only || filenames_nomatch_only ||  /* Now grep the file */
   (show_filenames && !only_one_at_top))? pathname : NULL;  
2190    
2191  rc = pcregrep(in, printname);  rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2192      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2193    
2194    /* Close in an appropriate manner. */
2195    
2196    #ifdef SUPPORT_LIBZ
2197    if (frtype == FR_LIBZ)
2198      gzclose(ingz);
2199    else
2200    #endif
2201    
2202    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2203    read failed. If the error indicates that the file isn't in fact bzipped, try
2204    again as a normal file. */
2205    
2206    #ifdef SUPPORT_LIBBZ2
2207    if (frtype == FR_LIBBZ2)
2208      {
2209      if (rc == 3)
2210        {
2211        int errnum;
2212        const char *err = BZ2_bzerror(inbz2, &errnum);
2213        if (errnum == BZ_DATA_ERROR_MAGIC)
2214          {
2215          BZ2_bzclose(inbz2);
2216          goto PLAIN_FILE;
2217          }
2218        else if (!silent)
2219          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2220            pathname, err);
2221        rc = 2;    /* The normal "something went wrong" code */
2222        }
2223      BZ2_bzclose(inbz2);
2224      }
2225    else
2226    #endif
2227    
2228    /* Normal file close */
2229    
2230  fclose(in);  fclose(in);
2231    
2232    /* Pass back the yield from pcregrep(). */
2233    
2234  return rc;  return rc;
2235  }  }
2236    
2237    
2238    
   
2239  /*************************************************  /*************************************************
2240  *                Usage function                  *  *    Handle a single-letter, no data option      *
2241  *************************************************/  *************************************************/
2242    
2243  static int  static int
2244  usage(int rc)  handle_option(int letter, int options)
2245  {  {
2246  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");  switch(letter)
2247  fprintf(stderr, "Type `pcregrep --help' for more information.\n");    {
2248  return rc;    case N_FOFFSETS: file_offsets = TRUE; break;
2249      case N_HELP: help(); pcregrep_exit(0);
2250      case N_LBUFFER: line_buffered = TRUE; break;
2251      case N_LOFFSETS: line_offsets = number = TRUE; break;
2252      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2253      case 'a': binary_files = BIN_TEXT; break;
2254      case 'c': count_only = TRUE; break;
2255      case 'F': process_options |= PO_FIXED_STRINGS; break;
2256      case 'H': filenames = FN_FORCE; break;
2257      case 'I': binary_files = BIN_NOMATCH; break;
2258      case 'h': filenames = FN_NONE; break;
2259      case 'i': options |= PCRE_CASELESS; break;
2260      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2261      case 'L': filenames = FN_NOMATCH_ONLY; break;
2262      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2263      case 'n': number = TRUE; break;
2264    
2265      case 'o':
2266      only_matching_last = add_number(0, only_matching_last);
2267      if (only_matching == NULL) only_matching = only_matching_last;
2268      break;
2269    
2270      case 'q': quiet = TRUE; break;
2271      case 'r': dee_action = dee_RECURSE; break;
2272      case 's': silent = TRUE; break;
2273      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2274      case 'v': invert = TRUE; break;
2275      case 'w': process_options |= PO_WORD_MATCH; break;
2276      case 'x': process_options |= PO_LINE_MATCH; break;
2277    
2278      case 'V':
2279      fprintf(stdout, "pcregrep version %s\n", pcre_version());
2280      pcregrep_exit(0);
2281      break;
2282    
2283      default:
2284      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2285      pcregrep_exit(usage(2));
2286      }
2287    
2288    return options;
2289  }  }
2290    
2291    
2292    
2293    
2294  /*************************************************  /*************************************************
2295  *                Help function                   *  *          Construct printed ordinal             *
2296  *************************************************/  *************************************************/
2297    
2298  static void  /* This turns a number into "1st", "3rd", etc. */
2299  help(void)  
2300  {  static char *
2301  option_item *op;  ordin(int n)
2302    {
2303    static char buffer[8];
2304    char *p = buffer;
2305    sprintf(p, "%d", n);
2306    while (*p != 0) p++;
2307    switch (n%10)
2308      {
2309      case 1: strcpy(p, "st"); break;
2310      case 2: strcpy(p, "nd"); break;
2311      case 3: strcpy(p, "rd"); break;
2312      default: strcpy(p, "th"); break;
2313      }
2314    return buffer;
2315    }
2316    
2317    
2318    
2319    /*************************************************
2320    *          Compile a single pattern              *
2321    *************************************************/
2322    
2323    /* Do nothing if the pattern has already been compiled. This is the case for
2324    include/exclude patterns read from a file.
2325    
2326    When the -F option has been used, each "pattern" may be a list of strings,
2327    separated by line breaks. They will be matched literally. We split such a
2328    string and compile the first substring, inserting an additional block into the
2329    pattern chain.
2330    
2331    Arguments:
2332      p              points to the pattern block
2333      options        the PCRE options
2334      popts          the processing options
2335      fromfile       TRUE if the pattern was read from a file
2336      fromtext       file name or identifying text (e.g. "include")
2337      count          0 if this is the only command line pattern, or
2338                     number of the command line pattern, or
2339                     linenumber for a pattern from a file
2340    
2341    Returns:         TRUE on success, FALSE after an error
2342    */
2343    
2344    static BOOL
2345    compile_pattern(patstr *p, int options, int popts, int fromfile,
2346      const char *fromtext, int count)
2347    {
2348    char buffer[PATBUFSIZE];
2349    const char *error;
2350    char *ps = p->string;
2351    int patlen = strlen(ps);
2352    int errptr;
2353    
2354    if (p->compiled != NULL) return TRUE;
2355    
2356    if ((popts & PO_FIXED_STRINGS) != 0)
2357      {
2358      int ellength;
2359      char *eop = ps + patlen;
2360      char *pe = end_of_line(ps, eop, &ellength);
2361    
2362      if (ellength != 0)
2363        {
2364        if (add_pattern(pe, p) == NULL) return FALSE;
2365        patlen = (int)(pe - ps - ellength);
2366        }
2367      }
2368    
2369    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2370    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2371    if (p->compiled != NULL) return TRUE;
2372    
2373  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  /* Handle compile errors */
 printf("Search for PATTERN in each FILE or standard input.\n");  
 printf("PATTERN must be present if -f is not used.\n");  
 printf("\"-\" can be used as a file name to mean STDIN.\n");  
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
2374    
2375  printf("Options:\n");  errptr -= (int)strlen(prefix[popts]);
2376    if (errptr > patlen) errptr = patlen;
2377    
2378  for (op = optionlist; op->one_char != 0; op++)  if (fromfile)
2379    {    {
2380    int n;    fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2381    char s[4];      "at offset %d: %s\n", count, fromtext, errptr, error);
2382    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    }
2383    printf("  %s --%s%n", s, op->long_name, &n);  else
2384    n = 30 - n;    {
2385    if (n < 1) n = 1;    if (count == 0)
2386    printf("%.*s%s\n", n, "                    ", op->help_text);      fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2387          fromtext, errptr, error);
2388      else
2389        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2390          ordin(count), fromtext, errptr, error);
2391    }    }
2392    
2393  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  return FALSE;
 printf("trailing white space is removed and blank lines are ignored.\n");  
 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
2394  }  }
2395    
2396    
2397    
   
2398  /*************************************************  /*************************************************
2399  *    Handle a single-letter, no data option      *  *     Read and compile a file of patterns        *
2400  *************************************************/  *************************************************/
2401    
2402  static int  /* This is used for --filelist, --include-from, and --exclude-from.
2403  handle_option(int letter, int options)  
2404    Arguments:
2405      name         the name of the file; "-" is stdin
2406      patptr       pointer to the pattern chain anchor
2407      patlastptr   pointer to the last pattern pointer
2408      popts        the process options to pass to pattern_compile()
2409    
2410    Returns:       TRUE if all went well
2411    */
2412    
2413    static BOOL
2414    read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2415  {  {
2416  switch(letter)  int linenumber = 0;
2417    FILE *f;
2418    char *filename;
2419    char buffer[PATBUFSIZE];
2420    
2421    if (strcmp(name, "-") == 0)
2422    {    {
2423    case -1:  help(); exit(0);    f = stdin;
2424    case 'c': count_only = TRUE; break;    filename = stdin_name;
2425    case 'h': filenames = FALSE; break;    }
2426    case 'i': options |= PCRE_CASELESS; break;  else
2427    case 'l': filenames_only = TRUE; break;    {
2428    case 'L': filenames_nomatch_only = TRUE; break;    f = fopen(name, "r");
2429    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    if (f == NULL)
2430    case 'n': number = TRUE; break;      {
2431    case 'q': quiet = TRUE; break;      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2432    case 'r': recurse = TRUE; break;      return FALSE;
2433    case 's': silent = TRUE; break;      }
2434    case 'u': options |= PCRE_UTF8; break;    filename = name;
2435    case 'v': invert = TRUE; break;    }
   case 'w': word_match = TRUE; break;  
   case 'x': whole_lines = TRUE; break;  
2436    
2437    case 'V':  while (fgets(buffer, PATBUFSIZE, f) != NULL)
2438    fprintf(stderr, "pcregrep version %s using ", VERSION);    {
2439    fprintf(stderr, "PCRE version %s\n", pcre_version());    char *s = buffer + (int)strlen(buffer);
2440    exit(0);    while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2441    break;    *s = 0;
2442      linenumber++;
2443      if (buffer[0] == 0) continue;   /* Skip blank lines */
2444    
2445    default:    /* Note: this call to add_pattern() puts a pointer to the local variable
2446    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    "buffer" into the pattern chain. However, that pointer is used only when
2447    exit(usage(2));    compiling the pattern, which happens immediately below, so we flatten it
2448      afterwards, as a precaution against any later code trying to use it. */
2449    
2450      *patlastptr = add_pattern(buffer, *patlastptr);
2451      if (*patlastptr == NULL) return FALSE;
2452      if (*patptr == NULL) *patptr = *patlastptr;
2453    
2454      /* This loop is needed because compiling a "pattern" when -F is set may add
2455      on additional literal patterns if the original contains a newline. In the
2456      common case, it never will, because fgets() stops at a newline. However,
2457      the -N option can be used to give pcregrep a different newline setting. */
2458    
2459      for(;;)
2460        {
2461        if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2462            linenumber))
2463          return FALSE;
2464        (*patlastptr)->string = NULL;            /* Insurance */
2465        if ((*patlastptr)->next == NULL) break;
2466        *patlastptr = (*patlastptr)->next;
2467        }
2468    }    }
2469    
2470  return options;  if (f != stdin) fclose(f);
2471    return TRUE;
2472  }  }
2473    
2474    
2475    
   
2476  /*************************************************  /*************************************************
2477  *                Main program                    *  *                Main program                    *
2478  *************************************************/  *************************************************/
# Line 838  main(int argc, char **argv) Line 2484  main(int argc, char **argv)
2484  {  {
2485  int i, j;  int i, j;
2486  int rc = 1;  int rc = 1;
 int options = 0;  
 int errptr;  
 const char *error;  
2487  BOOL only_one_at_top;  BOOL only_one_at_top;
2488    patstr *cp;
2489    fnstr *fn;
2490    const char *locale_from = "--locale";
2491    const char *error;
2492    
2493    #ifdef SUPPORT_PCREGREP_JIT
2494    pcre_jit_stack *jit_stack = NULL;
2495    #endif
2496    
2497    /* Set the default line ending value from the default in the PCRE library;
2498    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2499    Note that the return values from pcre_config(), though derived from the ASCII
2500    codes, are the same in EBCDIC environments, so we must use the actual values
2501    rather than escapes such as as '\r'. */
2502    
2503    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2504    switch(i)
2505      {
2506      default:               newline = (char *)"lf"; break;
2507      case 13:               newline = (char *)"cr"; break;
2508      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2509      case -1:               newline = (char *)"any"; break;
2510      case -2:               newline = (char *)"anycrlf"; break;
2511      }
2512    
2513  /* Process the options */  /* Process the options */
2514    
# Line 855  for (i = 1; i < argc; i++) Line 2522  for (i = 1; i < argc; i++)
2522    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2523    
2524    /* If we hit an argument that is just "-", it may be a reference to STDIN,    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2525    but only if we have previously had -f to define the patterns. */    but only if we have previously had -e or -f to define the patterns. */
2526    
2527    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2528      {      {
2529      if (pattern_filename != NULL) break;      if (pattern_files != NULL || patterns != NULL) break;
2530        else exit(usage(2));        else pcregrep_exit(usage(2));
2531      }      }
2532    
2533    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 881  for (i = 1; i < argc; i++) Line 2548  for (i = 1; i < argc; i++)
2548      /* Some long options have data that follows after =, for example file=name.      /* Some long options have data that follows after =, for example file=name.
2549      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2550      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2551      with Jeff Friedl in preferring "regex" without the "p". These options are      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2552      entered in the table as "regex(p)". No option is in both these categories,      These options are entered in the table as "regex(p)". Options can be in
2553      fortunately. */      both these categories. */
2554    
2555      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2556        {        {
2557        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2558        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2559        if (opbra == NULL)     /* Not a (p) case */  
2560          /* Handle options with only one spelling of the name */
2561    
2562          if (opbra == NULL)     /* Does not contain '(' */
2563          {          {
2564          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2565            {            {
# Line 897  for (i = 1; i < argc; i++) Line 2567  for (i = 1; i < argc; i++)
2567            }            }
2568          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2569            {            {
2570            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2571            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2572                (int)strlen(arg) : (int)(argequals - arg);
2573            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2574              {              {
2575              option_data = arg + arglen;              option_data = arg + arglen;
# Line 911  for (i = 1; i < argc; i++) Line 2582  for (i = 1; i < argc; i++)
2582              }              }
2583            }            }
2584          }          }
2585        else                   /* Special case xxxx(p) */  
2586          /* Handle options with an alternate spelling of the name */
2587    
2588          else
2589          {          {
2590          char buff1[24];          char buff1[24];
2591          char buff2[24];          char buff2[24];
2592          int baselen = opbra - op->long_name;  
2593            int baselen = (int)(opbra - op->long_name);
2594            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2595            int arglen = (argequals == NULL || equals == NULL)?
2596              (int)strlen(arg) : (int)(argequals - arg);
2597    
2598          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2599          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2600            opbra + 1);  
2601          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2602               strncmp(arg, buff2, arglen) == 0)
2603              {
2604              if (equals != NULL && argequals != NULL)
2605                {
2606                option_data = argequals;
2607                if (*option_data == '=')
2608                  {
2609                  option_data++;
2610                  longopwasequals = TRUE;
2611                  }
2612                }
2613            break;            break;
2614              }
2615          }          }
2616        }        }
2617    
2618      if (op->one_char == 0)      if (op->one_char == 0)
2619        {        {
2620        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2621        exit(usage(2));        pcregrep_exit(usage(2));
2622        }        }
2623      }      }
2624    
2625      /* Jeffrey Friedl's debugging harness uses these additional options which
2626      are not in the right form for putting in the option table because they use
2627      only one hyphen, yet are more than one character long. By putting them
2628      separately here, they will not get displayed as part of the help() output,
2629      but I don't think Jeffrey will care about that. */
2630    
2631    #ifdef JFRIEDL_DEBUG
2632      else if (strcmp(argv[i], "-pre") == 0) {
2633              jfriedl_prefix = argv[++i];
2634              continue;
2635      } else if (strcmp(argv[i], "-post") == 0) {
2636              jfriedl_postfix = argv[++i];
2637              continue;
2638      } else if (strcmp(argv[i], "-XT") == 0) {
2639              sscanf(argv[++i], "%d", &jfriedl_XT);
2640              continue;
2641      } else if (strcmp(argv[i], "-XR") == 0) {
2642              sscanf(argv[++i], "%d", &jfriedl_XR);
2643              continue;
2644      }
2645    #endif
2646    
2647    
2648    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
2649    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
2650    
# Line 938  for (i = 1; i < argc; i++) Line 2652  for (i = 1; i < argc; i++)
2652      {      {
2653      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2654      longop = FALSE;      longop = FALSE;
2655    
2656      while (*s != 0)      while (*s != 0)
2657        {        {
2658        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2659          { if (*s == op->one_char) break; }          {
2660            if (*s == op->one_char) break;
2661            }
2662        if (op->one_char == 0)        if (op->one_char == 0)
2663          {          {
2664          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2665            *s, argv[i]);            *s, argv[i]);
2666          exit(usage(2));          pcregrep_exit(usage(2));
2667            }
2668    
2669          option_data = s+1;
2670    
2671          /* Break out if this is the last character in the string; it's handled
2672          below like a single multi-char option. */
2673    
2674          if (*option_data == 0) break;
2675    
2676          /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2677          are used for ones that either have a numerical number or defaults, i.e.
2678          the data is optional. If a digit follows, there is data; if not, carry on
2679          with other single-character options in the same string. */
2680    
2681          if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2682            {
2683            if (isdigit((unsigned char)s[1])) break;
2684          }          }
2685        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for an option with data */
2686          {          {
2687          option_data = s+1;          if (op->type != OP_NODATA) break;
         break;  
2688          }          }
2689        options = handle_option(*s++, options);  
2690          /* Handle a single-character option with no data, then loop for the
2691          next character in the string. */
2692    
2693          pcre_options = handle_option(*s++, pcre_options);
2694        }        }
2695      }      }
2696    
2697    /* At this point we should have op pointing to a matched option */    /* At this point we should have op pointing to a matched option. If the type
2698      is NO_DATA, it means that there is no data, and the option might set
2699      something in the PCRE options. */
2700    
2701    if (op->type == OP_NODATA)    if (op->type == OP_NODATA)
     options = handle_option(op->one_char, options);  
   else  
2702      {      {
2703      if (*option_data == 0)      pcre_options = handle_option(op->one_char, pcre_options);
2704        continue;
2705        }
2706    
2707      /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2708      either has a value or defaults to something. It cannot have data in a
2709      separate item. At the moment, the only such options are "colo(u)r",
2710      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2711    
2712      if (*option_data == 0 &&
2713          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2714           op->type == OP_OP_NUMBERS))
2715        {
2716        switch (op->one_char)
2717        {        {
2718        if (i >= argc - 1 || longopwasequals)        case N_COLOUR:
2719          {        colour_option = (char *)"auto";
2720          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        break;
2721          exit(usage(2));  
2722          }        case 'o':
2723        option_data = argv[++i];        only_matching_last = add_number(0, only_matching_last);
2724          if (only_matching == NULL) only_matching = only_matching_last;
2725          break;
2726    
2727    #ifdef JFRIEDL_DEBUG
2728          case 'S':
2729          S_arg = 0;
2730          break;
2731    #endif
2732        }        }
2733        continue;
2734        }
2735    
2736      /* Otherwise, find the data string for the option. */
2737    
2738      if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else    if (*option_data == 0)
2739        {
2740        if (i >= argc - 1 || longopwasequals)
2741        {        {
2742        char *endptr;        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2743        int n = strtoul(option_data, &endptr, 10);        pcregrep_exit(usage(2));
2744        if (*endptr != 0)        }
2745          {      option_data = argv[++i];
2746          if (longop)      }
2747            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",  
2748              option_data, op->long_name);    /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2749          else    added to a chain of numbers. */
2750            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",  
2751              option_data, op->one_char);    if (op->type == OP_OP_NUMBERS)
2752          exit(usage(2));      {
2753          }      unsigned long int n = decode_number(option_data, op, longop);
2754        *((int *)op->dataptr) = n;      omdatastr *omd = (omdatastr *)op->dataptr;
2755        *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2756        if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2757        }
2758    
2759      /* If the option type is OP_PATLIST, it's the -e option, or one of the
2760      include/exclude options, which can be called multiple times to create lists
2761      of patterns. */
2762    
2763      else if (op->type == OP_PATLIST)
2764        {
2765        patdatastr *pd = (patdatastr *)op->dataptr;
2766        *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2767        if (*(pd->lastptr) == NULL) goto EXIT2;
2768        if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2769        }
2770    
2771      /* If the option type is OP_FILELIST, it's one of the options that names a
2772      file. */
2773    
2774      else if (op->type == OP_FILELIST)
2775        {
2776        fndatastr *fd = (fndatastr *)op->dataptr;
2777        fn = (fnstr *)malloc(sizeof(fnstr));
2778        if (fn == NULL)
2779          {
2780          fprintf(stderr, "pcregrep: malloc failed\n");
2781          goto EXIT2;
2782          }
2783        fn->next = NULL;
2784        fn->name = option_data;
2785        if (*(fd->anchor) == NULL)
2786          *(fd->anchor) = fn;
2787        else
2788          (*(fd->lastptr))->next = fn;
2789        *(fd->lastptr) = fn;
2790        }
2791    
2792      /* Handle OP_BINARY_FILES */
2793    
2794      else if (op->type == OP_BINFILES)
2795        {
2796        if (strcmp(option_data, "binary") == 0)
2797          binary_files = BIN_BINARY;
2798        else if (strcmp(option_data, "without-match") == 0)
2799          binary_files = BIN_NOMATCH;
2800        else if (strcmp(option_data, "text") == 0)
2801          binary_files = BIN_TEXT;
2802        else
2803          {
2804          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2805            option_data);
2806          pcregrep_exit(usage(2));
2807        }        }
2808      }      }
2809    
2810      /* Otherwise, deal with a single string or numeric data value. */
2811    
2812      else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2813               op->type != OP_OP_NUMBER)
2814        {
2815        *((char **)op->dataptr) = option_data;
2816        }
2817      else
2818        {
2819        unsigned long int n = decode_number(option_data, op, longop);
2820        if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2821          else *((int *)op->dataptr) = n;
2822        }
2823    }    }
2824    
2825  /* Options have been decoded. If -C was used, its value is used as a default  /* Options have been decoded. If -C was used, its value is used as a default
# Line 1001  if (both_context > 0) Line 2831  if (both_context > 0)
2831    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2832    }    }
2833    
2834  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2835  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  However, all three set show_only_matching because they display, each in their
2836    own way, only the data that has matched. */
2837    
2838  if (pattern_list == NULL || hints_list == NULL)  if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2839        (file_offsets && line_offsets))
2840    {    {
2841    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2842    return 2;      "and/or --line-offsets\n");
2843      pcregrep_exit(usage(2));
2844    }    }
2845    
2846  /* Compile the regular expression(s). */  if (only_matching != NULL || file_offsets || line_offsets)
2847      show_only_matching = TRUE;
2848    
2849    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2850    LC_ALL environment variable is set, and if so, use it. */
2851    
2852  if (pattern_filename != NULL)  if (locale == NULL)
2853    {    {
2854    FILE *f = fopen(pattern_filename, "r");    locale = getenv("LC_ALL");
2855    char buffer[MBUFTHIRD + 16];    locale_from = "LCC_ALL";
2856    char *rdstart;    }
   int adjust = 0;  
2857    
2858    if (f == NULL)  if (locale == NULL)
2859      {
2860      locale = getenv("LC_CTYPE");
2861      locale_from = "LC_CTYPE";
2862      }
2863    
2864    /* If a locale has been provided, set it, and generate the tables the PCRE
2865    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2866    
2867    if (locale != NULL)
2868      {
2869      if (setlocale(LC_CTYPE, locale) == NULL)
2870      {      {
2871      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2872        strerror(errno));        locale, locale_from);
2873      return 2;      return 2;
2874      }      }
2875      pcretables = pcre_maketables();
2876      }
2877    
2878    if (whole_lines)  /* Sort out colouring */
2879      {  
2880      strcpy(buffer, "^(?:");  if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2881      adjust = 4;    {
2882      }    if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2883    else if (word_match)    else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2884      else
2885      {      {
2886      strcpy(buffer, "\\b");      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2887      adjust = 2;        colour_option);
2888        return 2;
2889      }      }
2890      if (do_colour)
   rdstart = buffer + adjust;  
   while (fgets(rdstart, MBUFTHIRD, f) != NULL)  
2891      {      {
2892      char *s = rdstart + (int)strlen(rdstart);      char *cs = getenv("PCREGREP_COLOUR");
2893      if (pattern_count >= MAX_PATTERN_COUNT)      if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2894        {      if (cs != NULL) colour_string = cs;
       fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",  
         MAX_PATTERN_COUNT);  
       return 2;  
       }  
     while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;  
     if (s == rdstart) continue;  
     if (whole_lines) strcpy(s, ")$");  
       else if (word_match)strcpy(s, "\\b");  
         else *s = 0;  
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr - adjust, error);  
       return 2;  
       }  
2895      }      }
   fclose(f);  
2896    }    }
2897    
2898  /* If no file name, a single regex must be given inline. */  /* Interpret the newline type; the default settings are Unix-like. */
2899    
2900    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2901      {
2902      pcre_options |= PCRE_NEWLINE_CR;
2903      endlinetype = EL_CR;
2904      }
2905    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2906      {
2907      pcre_options |= PCRE_NEWLINE_LF;
2908      endlinetype = EL_LF;
2909      }
2910    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2911      {
2912      pcre_options |= PCRE_NEWLINE_CRLF;
2913      endlinetype = EL_CRLF;
2914      }
2915    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2916      {
2917      pcre_options |= PCRE_NEWLINE_ANY;
2918      endlinetype = EL_ANY;
2919      }
2920    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2921      {
2922      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2923      endlinetype = EL_ANYCRLF;
2924      }
2925  else  else
2926    {    {
2927    char buffer[MBUFTHIRD + 16];    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2928    char *pat;    return 2;
2929    int adjust = 0;    }
2930    
2931    if (i >= argc) return usage(2);  /* Interpret the text values for -d and -D */
2932    
2933    if (whole_lines)  if (dee_option != NULL)
2934      {    {
2935      sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);    if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2936      pat = buffer;    else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2937      adjust = 4;    else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2938      }    else
   else if (word_match)  
2939      {      {
2940      sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2941      pat = buffer;      return 2;
     adjust = 2;  
2942      }      }
2943    else pat = argv[i++];    }
   
   pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);  
2944    
2945    if (pattern_list[0] == NULL)  if (DEE_option != NULL)
2946      {
2947      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2948      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2949      else
2950      {      {
2951      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
       errptr - adjust, error);  
2952      return 2;      return 2;
2953      }      }
   pattern_count++;  
2954    }    }
2955    
2956  /* Study the regular expressions, as we will be running them many times */  /* Check the values for Jeffrey Friedl's debugging options. */
2957    
2958    #ifdef JFRIEDL_DEBUG
2959    if (S_arg > 9)
2960      {
2961      fprintf(stderr, "pcregrep: bad value for -S option\n");
2962      return 2;
2963      }
2964    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2965      {
2966      if (jfriedl_XT == 0) jfriedl_XT = 1;
2967      if (jfriedl_XR == 0) jfriedl_XR = 1;
2968      }
2969    #endif
2970    
2971    /* Get memory for the main buffer. */
2972    
2973    bufsize = 3*bufthird;
2974    main_buffer = (char *)malloc(bufsize);
2975    
2976    if (main_buffer == NULL)
2977      {
2978      fprintf(stderr, "pcregrep: malloc failed\n");
2979      goto EXIT2;
2980      }
2981    
2982    /* If no patterns were provided by -e, and there are no files provided by -f,
2983    the first argument is the one and only pattern, and it must exist. */
2984    
2985    if (patterns == NULL && pattern_files == NULL)
2986      {
2987      if (i >= argc) return usage(2);
2988      patterns = patterns_last = add_pattern(argv[i++], NULL);
2989      if (patterns == NULL) goto EXIT2;
2990      }
2991    
2992    /* Compile the patterns that were provided on the command line, either by
2993    multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2994    after all the command-line options are read so that we know which PCRE options
2995    to use. When -F is used, compile_pattern() may add another block into the
2996    chain, so we must not access the next pointer till after the compile. */
2997    
2998    for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2999      {
3000      if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3001           (j == 1 && patterns->next == NULL)? 0 : j))
3002        goto EXIT2;
3003      }
3004    
3005    /* Read and compile the regular expressions that are provided in files. */
3006    
3007    for (fn = pattern_files; fn != NULL; fn = fn->next)
3008      {
3009      if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3010        goto EXIT2;
3011      }
3012    
3013    /* Study the regular expressions, as we will be running them many times. If an
3014    extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3015    returned, even if studying produces no data. */
3016    
3017    if (match_limit > 0 || match_limit_recursion > 0)
3018      study_options |= PCRE_STUDY_EXTRA_NEEDED;
3019    
3020  for (j = 0; j < pattern_count; j++)  /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3021    
3022    #ifdef SUPPORT_PCREGREP_JIT
3023    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3024      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3025    #endif
3026    
3027    for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3028    {    {
3029    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
3030    if (error != NULL)    if (error != NULL)
3031      {      {
3032      char s[16];      char s[16];
3033      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3034      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3035      return 2;      goto EXIT2;
3036      }      }
3037    #ifdef SUPPORT_PCREGREP_JIT
3038      if (jit_stack != NULL && cp->hint != NULL)
3039        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3040    #endif
3041    }    }
3042    
3043  /* If there are include or exclude patterns, compile them. */  /* If --match-limit or --recursion-limit was set, put the value(s) into the
3044    pcre_extra block for each pattern. There will always be an extra block because
3045    of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3046    
3047  if (exclude_pattern != NULL)  for (cp = patterns; cp != NULL; cp = cp->next)
3048    {    {
3049    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);    if (match_limit > 0)
   if (exclude_compiled == NULL)  
3050      {      {
3051      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3052        errptr, error);      cp->hint->match_limit = match_limit;
3053      return 2;      }
3054    
3055      if (match_limit_recursion > 0)
3056        {
3057        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3058        cp->hint->match_limit_recursion = match_limit_recursion;
3059      }      }
3060    }    }
3061    
3062  if (include_pattern != NULL)  /* If there are include or exclude patterns read from the command line, compile
3063    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3064    0. */
3065    
3066    for (j = 0; j < 4; j++)
3067    {    {
3068    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);    int k;
3069    if (include_compiled == NULL)    for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3070      {      {
3071      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3072        errptr, error);           (k == 1 && cp->next == NULL)? 0 : k))
3073      return 2;        goto EXIT2;
3074      }      }
3075    }    }
3076    
3077  /* If there are no further arguments, do the business on stdin and exit */  /* Read and compile include/exclude patterns from files. */
3078    
3079    for (fn = include_from; fn != NULL; fn = fn->next)
3080      {
3081      if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3082        goto EXIT2;
3083      }
3084    
3085    for (fn = exclude_from; fn != NULL; fn = fn->next)
3086      {
3087      if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3088        goto EXIT2;
3089      }
3090    
3091  if (i >= argc) return pcregrep(stdin,  /* If there are no files that contain lists of files to search, and there are
3092    (filenames_only || filenames_nomatch_only)? stdin_name : NULL);  no file arguments, search stdin, and then exit. */
3093    
3094  /* Otherwise, work through the remaining arguments as files or directories.  if (file_lists == NULL && i >= argc)
3095  Pass in the fact that there is only one argument at top level - this suppresses    {
3096  the file name if the argument is not a directory and filenames_only is not set.    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3097  */      (filenames > FN_DEFAULT)? stdin_name : NULL);
3098      goto EXIT;
3099      }
3100    
3101    /* If any files that contains a list of files to search have been specified,
3102    read them line by line and search the given files. */
3103    
3104    for (fn = file_lists; fn != NULL; fn = fn->next)
3105      {
3106      char buffer[PATBUFSIZE];
3107      FILE *fl;
3108      if (strcmp(fn->name, "-") == 0) fl = stdin; else
3109        {
3110        fl = fopen(fn->name, "rb");
3111        if (fl == NULL)
3112          {
3113          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3114            strerror(errno));
3115          goto EXIT2;
3116          }
3117        }
3118      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3119        {
3120        int frc;
3121        char *end = buffer + (int)strlen(buffer);
3122        while (end > buffer && isspace(end[-1])) end--;
3123        *end = 0;
3124        if (*buffer != 0)
3125          {
3126          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3127          if (frc > 1) rc = frc;
3128            else if (frc == 0 && rc == 1) rc = 0;
3129          }
3130        }
3131      if (fl != stdin) fclose(fl);
3132      }
3133    
3134    /* After handling file-list, work through remaining arguments. Pass in the fact
3135    that there is only one argument at top level - this suppresses the file name if
3136    the argument is not a directory and filenames are not otherwise forced. */
3137    
3138  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1 && file_lists == NULL;
3139    
3140  for (; i < argc; i++)  for (; i < argc; i++)
3141    {    {
3142    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3143        only_one_at_top);
3144    if (frc > 1) rc = frc;    if (frc > 1) rc = frc;
3145      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
3146    }    }
3147    
3148  return rc;  EXIT:
3149    #ifdef SUPPORT_PCREGREP_JIT
3150    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3151    #endif
3152    
3153    if (main_buffer != NULL) free(main_buffer);
3154    
3155    free_pattern_chain(patterns);
3156    free_pattern_chain(include_patterns);
3157    free_pattern_chain(include_dir_patterns);
3158    free_pattern_chain(exclude_patterns);
3159    free_pattern_chain(exclude_dir_patterns);
3160    
3161    free_file_chain(exclude_from);
3162    free_file_chain(include_from);
3163    free_file_chain(pattern_files);
3164    free_file_chain(file_lists);
3165    
3166    while (only_matching != NULL)
3167      {
3168      omstr *this = only_matching;
3169      only_matching = this->next;
3170      free(this);
3171      }
3172    
3173    pcregrep_exit(rc);
3174    
3175    EXIT2:
3176    rc = 2;
3177    goto EXIT;
3178  }  }
3179    
3180  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.77  
changed lines
  Added in v.1136

  ViewVC Help
Powered by ViewVC 1.1.5