/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 1548 by ph10, Tue Apr 14 17:02:30 2015 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7    recurse into directories, and in z/OS it can handle PDS files.
8    
9    Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10    additional header is required. That header is not included in the main PCRE
11    distribution because other apparatus is needed to compile pcregrep for z/OS.
12    The header can be found in the special z/OS distribution, which is available
13    from www.zaconsultants.net or from www.cbttape.org.
14    
15               Copyright (c) 1997-2014 University of Cambridge
16    
17    -----------------------------------------------------------------------------
18    Redistribution and use in source and binary forms, with or without
19    modification, are permitted provided that the following conditions are met:
20    
21        * Redistributions of source code must retain the above copyright notice,
22          this list of conditions and the following disclaimer.
23    
24        * Redistributions in binary form must reproduce the above copyright
25          notice, this list of conditions and the following disclaimer in the
26          documentation and/or other materials provided with the distribution.
27    
28        * Neither the name of the University of Cambridge nor the names of its
29          contributors may be used to endorse or promote products derived from
30          this software without specific prior written permission.
31    
32    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42    POSSIBILITY OF SUCH DAMAGE.
43    -----------------------------------------------------------------------------
44    */
45    
46    #ifdef HAVE_CONFIG_H
47    #include "config.h"
48    #endif
49    
50  #include <ctype.h>  #include <ctype.h>
51    #include <locale.h>
52  #include <stdio.h>  #include <stdio.h>
53  #include <string.h>  #include <string.h>
54  #include <stdlib.h>  #include <stdlib.h>
55  #include <errno.h>  #include <errno.h>
56  #include "config.h"  
57    #include <sys/types.h>
58    #include <sys/stat.h>
59    
60    #ifdef HAVE_UNISTD_H
61    #include <unistd.h>
62    #endif
63    
64    #ifdef SUPPORT_LIBZ
65    #include <zlib.h>
66    #endif
67    
68    #ifdef SUPPORT_LIBBZ2
69    #include <bzlib.h>
70    #endif
71    
72  #include "pcre.h"  #include "pcre.h"
73    
74  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 76  its pattern matching. On a Unix system i
76    
77  typedef int BOOL;  typedef int BOOL;
78    
79  #define VERSION "2.0 01-Aug-2001"  #define OFFSET_SIZE 99
80  #define MAX_PATTERN_COUNT 100  
81    #if BUFSIZ > 8192
82    #define MAXPATLEN BUFSIZ
83    #else
84    #define MAXPATLEN 8192
85    #endif
86    
87    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
88    
89    /* Values for the "filenames" variable, which specifies options for file name
90    output. The order is important; it is assumed that a file name is wanted for
91    all values greater than FN_DEFAULT. */
92    
93    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94    
95    /* File reading styles */
96    
97    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
98    
99    /* Actions for the -d and -D options */
100    
101    enum { dee_READ, dee_SKIP, dee_RECURSE };
102    enum { DEE_READ, DEE_SKIP };
103    
104    /* Actions for special processing options (flag bits) */
105    
106    #define PO_WORD_MATCH     0x0001
107    #define PO_LINE_MATCH     0x0002
108    #define PO_FIXED_STRINGS  0x0004
109    
110    /* Line ending types */
111    
112    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113    
114    /* Binary file options */
115    
116    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117    
118    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119    environments), a warning is issued if the value of fwrite() is ignored.
120    Unfortunately, casting to (void) does not suppress the warning. To get round
121    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122    apply to fprintf(). */
123    
124    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125    
126    
127    
128  /*************************************************  /*************************************************
129  *               Global variables                 *  *               Global variables                 *
130  *************************************************/  *************************************************/
131    
132  static char *pattern_filename = NULL;  /* Jeffrey Friedl has some debugging requirements that are not part of the
133  static int  pattern_count = 0;  regular code. */
134  static pcre **pattern_list;  
135  static pcre_extra **hints_list;  #ifdef JFRIEDL_DEBUG
136    static int S_arg = -1;
137    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
138    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
139    static const char *jfriedl_prefix = "";
140    static const char *jfriedl_postfix = "";
141    #endif
142    
143    static int  endlinetype;
144    
145    static char *colour_string = (char *)"1;31";
146    static char *colour_option = NULL;
147    static char *dee_option = NULL;
148    static char *DEE_option = NULL;
149    static char *locale = NULL;
150    static char *main_buffer = NULL;
151    static char *newline = NULL;
152    static char *om_separator = (char *)"";
153    static char *stdin_name = (char *)"(standard input)";
154    
155    static const unsigned char *pcretables = NULL;
156    
157    static int after_context = 0;
158    static int before_context = 0;
159    static int binary_files = BIN_BINARY;
160    static int both_context = 0;
161    static int bufthird = PCREGREP_BUFSIZE;
162    static int bufsize = 3*PCREGREP_BUFSIZE;
163    
164    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165    static int dee_action = dee_SKIP;
166    #else
167    static int dee_action = dee_READ;
168    #endif
169    
170    static int DEE_action = DEE_READ;
171    static int error_count = 0;
172    static int filenames = FN_DEFAULT;
173    static int pcre_options = 0;
174    static int process_options = 0;
175    
176    #ifdef SUPPORT_PCREGREP_JIT
177    static int study_options = PCRE_STUDY_JIT_COMPILE;
178    #else
179    static int study_options = 0;
180    #endif
181    
182    static unsigned long int match_limit = 0;
183    static unsigned long int match_limit_recursion = 0;
184    
185  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
186  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
187  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
188    static BOOL hyphenpending = FALSE;
189  static BOOL invert = FALSE;  static BOOL invert = FALSE;
190    static BOOL line_buffered = FALSE;
191    static BOOL line_offsets = FALSE;
192    static BOOL multiline = FALSE;
193  static BOOL number = FALSE;  static BOOL number = FALSE;
194  static BOOL recurse = FALSE;  static BOOL omit_zero_count = FALSE;
195    static BOOL resource_error = FALSE;
196    static BOOL quiet = FALSE;
197    static BOOL show_only_matching = FALSE;
198  static BOOL silent = FALSE;  static BOOL silent = FALSE;
199  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
200    
201    /* Structure for list of --only-matching capturing numbers. */
202    
203    typedef struct omstr {
204      struct omstr *next;
205      int groupnum;
206    } omstr;
207    
208    static omstr *only_matching = NULL;
209    static omstr *only_matching_last = NULL;
210    
211    /* Structure for holding the two variables that describe a number chain. */
212    
213    typedef struct omdatastr {
214      omstr **anchor;
215      omstr **lastptr;
216    } omdatastr;
217    
218    static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219    
220    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
221    
222    typedef struct fnstr {
223      struct fnstr *next;
224      char *name;
225    } fnstr;
226    
227    static fnstr *exclude_from = NULL;
228    static fnstr *exclude_from_last = NULL;
229    static fnstr *include_from = NULL;
230    static fnstr *include_from_last = NULL;
231    
232    static fnstr *file_lists = NULL;
233    static fnstr *file_lists_last = NULL;
234    static fnstr *pattern_files = NULL;
235    static fnstr *pattern_files_last = NULL;
236    
237    /* Structure for holding the two variables that describe a file name chain. */
238    
239    typedef struct fndatastr {
240      fnstr **anchor;
241      fnstr **lastptr;
242    } fndatastr;
243    
244    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245    static fndatastr include_from_data = { &include_from, &include_from_last };
246    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248    
249    /* Structure for pattern and its compiled form; used for matching patterns and
250    also for include/exclude patterns. */
251    
252    typedef struct patstr {
253      struct patstr *next;
254      char *string;
255      pcre *compiled;
256      pcre_extra *hint;
257    } patstr;
258    
259    static patstr *patterns = NULL;
260    static patstr *patterns_last = NULL;
261    static patstr *include_patterns = NULL;
262    static patstr *include_patterns_last = NULL;
263    static patstr *exclude_patterns = NULL;
264    static patstr *exclude_patterns_last = NULL;
265    static patstr *include_dir_patterns = NULL;
266    static patstr *include_dir_patterns_last = NULL;
267    static patstr *exclude_dir_patterns = NULL;
268    static patstr *exclude_dir_patterns_last = NULL;
269    
270    /* Structure holding the two variables that describe a pattern chain. A pointer
271    to such structures is used for each appropriate option. */
272    
273    typedef struct patdatastr {
274      patstr **anchor;
275      patstr **lastptr;
276    } patdatastr;
277    
278    static patdatastr match_patdata = { &patterns, &patterns_last };
279    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283    
284    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285                                     &include_dir_patterns, &exclude_dir_patterns };
286    
287    static const char *incexname[4] = { "--include", "--exclude",
288                                        "--include-dir", "--exclude-dir" };
289    
290  /* Structure for options and list of them */  /* Structure for options and list of them */
291    
292    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293           OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294    
295  typedef struct option_item {  typedef struct option_item {
296      int type;
297    int one_char;    int one_char;
298    char *long_name;    void *dataptr;
299    char *help_text;    const char *long_name;
300      const char *help_text;
301  } option_item;  } option_item;
302    
303    /* Options without a single-letter equivalent get a negative value. This can be
304    used to identify them. */
305    
306    #define N_COLOUR       (-1)
307    #define N_EXCLUDE      (-2)
308    #define N_EXCLUDE_DIR  (-3)
309    #define N_HELP         (-4)
310    #define N_INCLUDE      (-5)
311    #define N_INCLUDE_DIR  (-6)
312    #define N_LABEL        (-7)
313    #define N_LOCALE       (-8)
314    #define N_NULL         (-9)
315    #define N_LOFFSETS     (-10)
316    #define N_FOFFSETS     (-11)
317    #define N_LBUFFER      (-12)
318    #define N_M_LIMIT      (-13)
319    #define N_M_LIMIT_REC  (-14)
320    #define N_BUFSIZE      (-15)
321    #define N_NOJIT        (-16)
322    #define N_FILE_LIST    (-17)
323    #define N_BINARY_FILES (-18)
324    #define N_EXCLUDE_FROM (-19)
325    #define N_INCLUDE_FROM (-20)
326    #define N_OM_SEPARATOR (-21)
327    
328  static option_item optionlist[] = {  static option_item optionlist[] = {
329    { -1,  "help",         "display this help and exit" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
330    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
331    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
332    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
333    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
334    { 'n', "line-number",  "print line number with output lines" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
335    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
336    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
337    { 'V', "version",      "print version information and exit" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
338    { 'v', "invert-match", "select non-matching lines" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
339    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
340    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
341    { 0,    NULL,           NULL }    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
342      { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
343      { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
344      { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
345      { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346      { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
347      { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
348      { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
349      { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
350      { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
351    #ifdef SUPPORT_PCREGREP_JIT
352      { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
353    #else
354      { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
355    #endif
356      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
357      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
358      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
359      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
360      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
361      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
362      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
363      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
365      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
367      { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368      { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
370      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
371      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
372      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
373      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377    
378      /* These two were accidentally implemented with underscores instead of
379      hyphens in the option names. As this was not discovered for several releases,
380      the incorrect versions are left in the table for compatibility. However, the
381      --help function misses out any option that has an underscore in its name. */
382    
383      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385    
386    #ifdef JFRIEDL_DEBUG
387      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
388    #endif
389      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
390      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
391      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
392      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
393      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
394      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
395      { OP_NODATA,    0,        NULL,               NULL,            NULL }
396  };  };
397    
398    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400    that the combination of -w and -x has the same effect as -x on its own, so we
401    can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402    prefix+suffix is 10 characters; if anything longer is added, it must be
403    adjusted. */
404    
405    static const char *prefix[] = {
406      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
407    
408    static const char *suffix[] = {
409      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
410    
411    /* UTF-8 tables - used only when the newline setting is "any". */
412    
413    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
414    
415    const char utf8_table4[] = {
416      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
417      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
418      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
419      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
420    
421    
422    
423    /*************************************************
424    *         Exit from the program                  *
425    *************************************************/
426    
427    /* If there has been a resource error, give a suitable message.
428    
429    Argument:  the return code
430    Returns:   does not return
431    */
432    
433    static void
434    pcregrep_exit(int rc)
435    {
436    if (resource_error)
437      {
438      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440        PCRE_ERROR_JIT_STACKLIMIT);
441      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442      }
443    exit(rc);
444    }
445    
446    
447    /*************************************************
448    *          Add item to chain of patterns         *
449    *************************************************/
450    
451    /* Used to add an item onto a chain, or just return an unconnected item if the
452    "after" argument is NULL.
453    
454    Arguments:
455      s          pattern string to add
456      after      if not NULL points to item to insert after
457    
458    Returns:     new pattern block or NULL on error
459    */
460    
461    static patstr *
462    add_pattern(char *s, patstr *after)
463    {
464    patstr *p = (patstr *)malloc(sizeof(patstr));
465    if (p == NULL)
466      {
467      fprintf(stderr, "pcregrep: malloc failed\n");
468      pcregrep_exit(2);
469      }
470    if (strlen(s) > MAXPATLEN)
471      {
472      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473        MAXPATLEN);
474      free(p);
475      return NULL;
476      }
477    p->next = NULL;
478    p->string = s;
479    p->compiled = NULL;
480    p->hint = NULL;
481    
482    if (after != NULL)
483      {
484      p->next = after->next;
485      after->next = p;
486      }
487    return p;
488    }
489    
490    
491    /*************************************************
492    *           Free chain of patterns               *
493    *************************************************/
494    
495    /* Used for several chains of patterns.
496    
497    Argument: pointer to start of chain
498    Returns:  nothing
499    */
500    
501    static void
502    free_pattern_chain(patstr *pc)
503    {
504    while (pc != NULL)
505      {
506      patstr *p = pc;
507      pc = p->next;
508      if (p->hint != NULL) pcre_free_study(p->hint);
509      if (p->compiled != NULL) pcre_free(p->compiled);
510      free(p);
511      }
512    }
513    
514    
515    /*************************************************
516    *           Free chain of file names             *
517    *************************************************/
518    
519    /*
520    Argument: pointer to start of chain
521    Returns:  nothing
522    */
523    
524    static void
525    free_file_chain(fnstr *fn)
526    {
527    while (fn != NULL)
528      {
529      fnstr *f = fn;
530      fn = f->next;
531      free(f);
532      }
533    }
534    
535    
536  /*************************************************  /*************************************************
537  *       Functions for directory scanning         *  *            OS-specific functions               *
538  *************************************************/  *************************************************/
539    
540  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific.
541  although at present the only ones are for Unix, and for "no directory recursion  At present there are versions for Unix-style environments, Windows, native
542  support". */  z/OS, and "no support". */
543    
544    
545  /************* Directory scanning in Unix ***********/  /************* Directory scanning Unix-style and z/OS ***********/
546    
547  #if IS_UNIX  #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
548  #include <sys/types.h>  #include <sys/types.h>
549  #include <sys/stat.h>  #include <sys/stat.h>
550  #include <dirent.h>  #include <dirent.h>
551    
552    #if defined NATIVE_ZOS
553    /************* Directory and PDS/E scanning for z/OS ***********/
554    /************* z/OS looks mostly like Unix with USS ************/
555    /* However, z/OS needs the #include statements in this header */
556    #include "pcrzosfs.h"
557    /* That header is not included in the main PCRE distribution because
558       other apparatus is needed to compile pcregrep for z/OS. The header
559       can be found in the special z/OS distribution, which is available
560       from www.zaconsultants.net or from www.cbttape.org. */
561    #endif
562    
563  typedef DIR directory_type;  typedef DIR directory_type;
564    #define FILESEP '/'
565    
566  int  static int
567  isdirectory(char *filename)  isdirectory(char *filename)
568  {  {
569  struct stat statbuf;  struct stat statbuf;
570  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
571    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
572  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
573  }  }
574    
575  directory_type *  static directory_type *
576  opendirectory(char *filename)  opendirectory(char *filename)
577  {  {
578  return opendir(filename);  return opendir(filename);
579  }  }
580    
581  char *  static char *
582  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
583  {  {
584  for (;;)  for (;;)
# Line 108  for (;;) Line 588  for (;;)
588    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
589      return dent->d_name;      return dent->d_name;
590    }    }
591  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
592  }  }
593    
594  void  static void
595  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
596  {  {
597  closedir(dir);  closedir(dir);
598  }  }
599    
600    
601  #else  /************* Test for regular file, Unix-style **********/
602    
603    static int
604    isregfile(char *filename)
605    {
606    struct stat statbuf;
607    if (stat(filename, &statbuf) < 0)
608      return 1;        /* In the expectation that opening as a file will fail */
609    return (statbuf.st_mode & S_IFMT) == S_IFREG;
610    }
611    
612    
613  /************* Directory scanning when we can't do it ***********/  #if defined NATIVE_ZOS
614    /************* Test for a terminal in z/OS **********/
615    /* isatty() does not work in a TSO environment, so always give FALSE.*/
616    
617  /* The type is void, and apart from isdirectory(), the functions do nothing. */  static BOOL
618    is_stdout_tty(void)
619    {
620    return FALSE;
621    }
622    
623  typedef void directory_type;  static BOOL
624    is_file_tty(FILE *f)
625    {
626    return FALSE;
627    }
628    
 int isdirectory(char *filename) { return FALSE; }  
 directory_type * opendirectory(char *filename) {}  
 char *readdirectory(directory_type *dir) {}  
 void closedirectory(directory_type *dir) {}  
629    
630    /************* Test for a terminal, Unix-style **********/
631    
632    #else
633    static BOOL
634    is_stdout_tty(void)
635    {
636    return isatty(fileno(stdout));
637    }
638    
639    static BOOL
640    is_file_tty(FILE *f)
641    {
642    return isatty(fileno(f));
643    }
644  #endif  #endif
645    
646    /* End of Unix-style or native z/OS environment functions. */
647    
648    
649  #if ! HAVE_STRERROR  /************* Directory scanning in Windows ***********/
 /*************************************************  
 *     Provide strerror() for non-ANSI libraries  *  
 *************************************************/  
650    
651  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()  /* I (Philip Hazel) have no means of testing this code. It was contributed by
652  in their libraries, but can provide the same facility by this simple  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
653  alternative function. */  when it did not exist. David Byron added a patch that moved the #include of
654    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
655    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
656    undefined when it is indeed undefined. */
657    
658  extern int   sys_nerr;  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
 extern char *sys_errlist[];  
659    
660  char *  #ifndef STRICT
661  strerror(int n)  # define STRICT
662  {  #endif
663  if (n < 0 || n >= sys_nerr) return "unknown error number";  #ifndef WIN32_LEAN_AND_MEAN
664  return sys_errlist[n];  # define WIN32_LEAN_AND_MEAN
665  }  #endif
 #endif /* HAVE_STRERROR */  
666    
667    #include <windows.h>
668    
669    #ifndef INVALID_FILE_ATTRIBUTES
670    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
671    #endif
672    
673  /*************************************************  typedef struct directory_type
674  *              Grep an individual file           *  {
675  *************************************************/  HANDLE handle;
676    BOOL first;
677    WIN32_FIND_DATA data;
678    } directory_type;
679    
680  static int  #define FILESEP '/'
681  pcregrep(FILE *in, char *name)  
682    int
683    isdirectory(char *filename)
684  {  {
685  int rc = 1;  DWORD attr = GetFileAttributes(filename);
686  int linenumber = 0;  if (attr == INVALID_FILE_ATTRIBUTES)
687  int count = 0;    return 0;
688  int offsets[99];  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
689  char buffer[BUFSIZ];  }
690    
691  while (fgets(buffer, sizeof(buffer), in) != NULL)  directory_type *
692    opendirectory(char *filename)
693    {
694    size_t len;
695    char *pattern;
696    directory_type *dir;
697    DWORD err;
698    len = strlen(filename);
699    pattern = (char *)malloc(len + 3);
700    dir = (directory_type *)malloc(sizeof(*dir));
701    if ((pattern == NULL) || (dir == NULL))
702    {    {
703    BOOL match = FALSE;    fprintf(stderr, "pcregrep: malloc failed\n");
704    int i;    pcregrep_exit(2);
705    int length = (int)strlen(buffer);    }
706    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;  memcpy(pattern, filename, len);
707    linenumber++;  memcpy(&(pattern[len]), "\\*", 3);
708    dir->handle = FindFirstFile(pattern, &(dir->data));
709    if (dir->handle != INVALID_HANDLE_VALUE)
710      {
711      free(pattern);
712      dir->first = TRUE;
713      return dir;
714      }
715    err = GetLastError();
716    free(pattern);
717    free(dir);
718    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
719    return NULL;
720    }
721    
722    for (i = 0; !match && i < pattern_count; i++)  char *
723    readdirectory(directory_type *dir)
724    {
725    for (;;)
726      {
727      if (!dir->first)
728      {      {
729      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      if (!FindNextFile(dir->handle, &(dir->data)))
730        offsets, 99) >= 0;        return NULL;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
731      }      }
732      else
   if (match != invert)  
733      {      {
734      if (count_only) count++;      dir->first = FALSE;
735        }
736      else if (filenames_only)    if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
737        {      return dir->data.cFileName;
738        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);    }
739        return 0;  #ifndef _MSC_VER
740        }  return NULL;   /* Keep compiler happy; never executed */
741    #endif
742    }
743    
744      else if (silent) return 0;  void
745    closedirectory(directory_type *dir)
746    {
747    FindClose(dir->handle);
748    free(dir);
749    }
750    
     else  
       {  
       if (name != NULL) fprintf(stdout, "%s:", name);  
       if (number) fprintf(stdout, "%d:", linenumber);  
       fprintf(stdout, "%s\n", buffer);  
       }  
751    
752      rc = 0;  /************* Test for regular file in Windows **********/
     }  
   }  
753    
754  if (count_only)  /* I don't know how to do this, or if it can be done; assume all paths are
755    {  regular if they are not directories. */
   if (name != NULL) fprintf(stdout, "%s:", name);  
   fprintf(stdout, "%d\n", count);  
   }  
756    
757  return rc;  int isregfile(char *filename)
758    {
759    return !isdirectory(filename);
760  }  }
761    
762    
763    /************* Test for a terminal in Windows **********/
764    
765    /* I don't know how to do this; assume never */
766    
767  /*************************************************  static BOOL
768  *     Grep a file or recurse into a directory    *  is_stdout_tty(void)
769  *************************************************/  {
770    return FALSE;
771    }
772    
773  static int  static BOOL
774  grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  is_file_tty(FILE *f)
   BOOL only_one_at_top)  
775  {  {
776  int rc = 1;  return FALSE;
777  int sep;  }
 FILE *in;  
778    
779  /* If the file is a directory and we are recursing, scan each file within it.  /* End of Windows functions */
 The scanning code is localized so it can be made system-specific. */  
780    
 if ((sep = isdirectory(filename)) != 0 && recurse)  
   {  
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
781    
782    if (dir == NULL)  /************* Directory scanning when we can't do it ***********/
     {  
     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  
       strerror(errno));  
     return 2;  
     }  
783    
784    while ((nextfile = readdirectory(dir)) != NULL)  /* The type is void, and apart from isdirectory(), the functions do nothing. */
     {  
     int frc;  
     sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);  
     frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);  
     if (frc == 0 && rc == 1) rc = 0;  
     }  
785    
786    closedirectory(dir);  #else
   return rc;  
   }  
787    
788  /* If the file is not a directory, or we are not recursing, scan it. If this is  #define FILESEP 0
789  the first and only argument at top level, we don't show the file name.  typedef void directory_type;
 Otherwise, control is via the show_filenames variable. */  
790    
791  in = fopen(filename, "r");  int isdirectory(char *filename) { return 0; }
792  if (in == NULL)  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
793    {  char *readdirectory(directory_type *dir) { return (char*)0;}
794    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));  void closedirectory(directory_type *dir) {}
   return 2;  
   }  
795    
 rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);  
 fclose(in);  
 return rc;  
 }  
796    
797    /************* Test for regular file when we can't do it **********/
798    
799    /* Assume all files are regular. */
800    
801    int isregfile(char *filename) { return 1; }
802    
 /*************************************************  
 *                Usage function                  *  
 *************************************************/  
803    
804  static int  /************* Test for a terminal when we can't do it **********/
805  usage(int rc)  
806    static BOOL
807    is_stdout_tty(void)
808  {  {
809  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  return FALSE;
810  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  }
811  return rc;  
812    static BOOL
813    is_file_tty(FILE *f)
814    {
815    return FALSE;
816  }  }
817    
818    #endif  /* End of system-specific functions */
819    
820    
821    
822    #ifndef HAVE_STRERROR
823  /*************************************************  /*************************************************
824  *                Help function                   *  *     Provide strerror() for non-ANSI libraries  *
825  *************************************************/  *************************************************/
826    
827  static void  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
828    in their libraries, but can provide the same facility by this simple
829    alternative function. */
830    
831    extern int   sys_nerr;
832    extern char *sys_errlist[];
833    
834    char *
835    strerror(int n)
836    {
837    if (n < 0 || n >= sys_nerr) return "unknown error number";
838    return sys_errlist[n];
839    }
840    #endif /* HAVE_STRERROR */
841    
842    
843    
844    /*************************************************
845    *                Usage function                  *
846    *************************************************/
847    
848    static int
849    usage(int rc)
850    {
851    option_item *op;
852    fprintf(stderr, "Usage: pcregrep [-");
853    for (op = optionlist; op->one_char != 0; op++)
854      {
855      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
856      }
857    fprintf(stderr, "] [long options] [pattern] [files]\n");
858    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
859      "options.\n");
860    return rc;
861    }
862    
863    
864    
865    /*************************************************
866    *                Help function                   *
867    *************************************************/
868    
869    static void
870  help(void)  help(void)
871  {  {
872  option_item *op;  option_item *op;
873    
874  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
875  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
876  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
877    printf("\"-\" can be used as a file name to mean STDIN.\n");
878    
879    #ifdef SUPPORT_LIBZ
880    printf("Files whose names end in .gz are read using zlib.\n");
881    #endif
882    
883    #ifdef SUPPORT_LIBBZ2
884    printf("Files whose names end in .bz2 are read using bzlib2.\n");
885    #endif
886    
887    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
888    printf("Other files and the standard input are read as plain files.\n\n");
889    #else
890    printf("All files are read as plain files, without any interpretation.\n\n");
891    #endif
892    
893    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
894  printf("Options:\n");  printf("Options:\n");
895    
896  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
897    {    {
898    int n;    int n;
899    char s[4];    char s[4];
900    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
901    printf("  %s --%s%n", s, op->long_name, &n);    /* Two options were accidentally implemented and documented with underscores
902    n = 30 - n;    instead of hyphens in their names, something that was not noticed for quite a
903      few releases. When fixing this, I left the underscored versions in the list
904      in case people were using them. However, we don't want to display them in the
905      help data. There are no other options that contain underscores, and we do not
906      expect ever to implement such options. Therefore, just omit any option that
907      contains an underscore. */
908    
909      if (strchr(op->long_name, '_') != NULL) continue;
910    
911      if (op->one_char > 0 && (op->long_name)[0] == 0)
912        n = 31 - printf("  -%c", op->one_char);
913      else
914        {
915        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
916          else strcpy(s, "   ");
917        n = 31 - printf("  %s --%s", s, op->long_name);
918        }
919    
920    if (n < 1) n = 1;    if (n < 1) n = 1;
921    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                           ", op->help_text);
922    }    }
923    
924  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
925  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
926  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("When reading patterns or file names from a file, trailing white\n");
927  printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("space is removed and blank lines are ignored.\n");
928    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
929    
930  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
931  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
932  }  }
933    
934    
935    
   
936  /*************************************************  /*************************************************
937  *                Handle an option                *  *            Test exclude/includes               *
938  *************************************************/  *************************************************/
939    
940  static int  /* If any exclude pattern matches, the path is excluded. Otherwise, unless
941  handle_option(int letter, int options)  there are no includes, the path must match an include pattern.
942    
943    Arguments:
944      path      the path to be matched
945      ip        the chain of include patterns
946      ep        the chain of exclude patterns
947    
948    Returns:    TRUE if the path is not excluded
949    */
950    
951    static BOOL
952    test_incexc(char *path, patstr *ip, patstr *ep)
953  {  {
954  switch(letter)  int plen = strlen(path);
955    
956    for (; ep != NULL; ep = ep->next)
957    {    {
958    case -1:  help(); exit(0);    if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
959    case 'c': count_only = TRUE; break;      return FALSE;
960    case 'h': filenames = FALSE; break;    }
   case 'i': options |= PCRE_CASELESS; break;  
   case 'l': filenames_only = TRUE;  
   case 'n': number = TRUE; break;  
   case 'r': recurse = TRUE; break;  
   case 's': silent = TRUE; break;  
   case 'v': invert = TRUE; break;  
   case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
961    
962    case 'V':  if (ip == NULL) return TRUE;
   fprintf(stderr, "pcregrep version %s using ", VERSION);  
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
   exit(0);  
   break;  
963    
964    default:  for (; ip != NULL; ip = ip->next)
965    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    {
966    exit(usage(2));    if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
967        return TRUE;
968    }    }
969    
970  return options;  return FALSE;
971  }  }
972    
973    
974    
975    /*************************************************
976    *         Decode integer argument value          *
977    *************************************************/
978    
979    /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
980    because SunOS4 doesn't have it. This is used only for unpicking arguments, so
981    just keep it simple.
982    
983    Arguments:
984      option_data   the option data string
985      op            the option item (for error messages)
986      longop        TRUE if option given in long form
987    
988    Returns:        a long integer
989    */
990    
991    static long int
992    decode_number(char *option_data, option_item *op, BOOL longop)
993    {
994    unsigned long int n = 0;
995    char *endptr = option_data;
996    while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
997    while (isdigit((unsigned char)(*endptr)))
998      n = n * 10 + (int)(*endptr++ - '0');
999    if (toupper(*endptr) == 'K')
1000      {
1001      n *= 1024;
1002      endptr++;
1003      }
1004    else if (toupper(*endptr) == 'M')
1005      {
1006      n *= 1024*1024;
1007      endptr++;
1008      }
1009    
1010    if (*endptr != 0)   /* Error */
1011      {
1012      if (longop)
1013        {
1014        char *equals = strchr(op->long_name, '=');
1015        int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1016          (int)(equals - op->long_name);
1017        fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1018          option_data, nlen, op->long_name);
1019        }
1020      else
1021        fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1022          option_data, op->one_char);
1023      pcregrep_exit(usage(2));
1024      }
1025    
1026    return n;
1027    }
1028    
1029    
1030    
1031  /*************************************************  /*************************************************
1032  *                Main program                    *  *       Add item to a chain of numbers           *
1033  *************************************************/  *************************************************/
1034    
1035  int  /* Used to add an item onto a chain, or just return an unconnected item if the
1036  main(int argc, char **argv)  "after" argument is NULL.
1037    
1038    Arguments:
1039      n          the number to add
1040      after      if not NULL points to item to insert after
1041    
1042    Returns:     new number block
1043    */
1044    
1045    static omstr *
1046    add_number(int n, omstr *after)
1047  {  {
1048  int i, j;  omstr *om = (omstr *)malloc(sizeof(omstr));
 int rc = 1;  
 int options = 0;  
 int errptr;  
 const char *error;  
 BOOL only_one_at_top;  
1049    
1050  /* Process the options */  if (om == NULL)
1051      {
1052      fprintf(stderr, "pcregrep: malloc failed\n");
1053      pcregrep_exit(2);
1054      }
1055    om->next = NULL;
1056    om->groupnum = n;
1057    
1058  for (i = 1; i < argc; i++)  if (after != NULL)
1059    {    {
1060    if (argv[i][0] != '-') break;    om->next = after->next;
1061      after->next = om;
1062      }
1063    return om;
1064    }
1065    
   /* Long name options */  
1066    
1067    if (argv[i][1] == '-')  
1068    /*************************************************
1069    *            Read one line of input              *
1070    *************************************************/
1071    
1072    /* Normally, input is read using fread() into a large buffer, so many lines may
1073    be read at once. However, doing this for tty input means that no output appears
1074    until a lot of input has been typed. Instead, tty input is handled line by
1075    line. We cannot use fgets() for this, because it does not stop at a binary
1076    zero, and therefore there is no way of telling how many characters it has read,
1077    because there may be binary zeros embedded in the data.
1078    
1079    Arguments:
1080      buffer     the buffer to read into
1081      length     the maximum number of characters to read
1082      f          the file
1083    
1084    Returns:     the number of characters read, zero at end of file
1085    */
1086    
1087    static unsigned int
1088    read_one_line(char *buffer, int length, FILE *f)
1089    {
1090    int c;
1091    int yield = 0;
1092    while ((c = fgetc(f)) != EOF)
1093      {
1094      buffer[yield++] = c;
1095      if (c == '\n' || yield >= length) break;
1096      }
1097    return yield;
1098    }
1099    
1100    
1101    
1102    /*************************************************
1103    *             Find end of line                   *
1104    *************************************************/
1105    
1106    /* The length of the endline sequence that is found is set via lenptr. This may
1107    be zero at the very end of the file if there is no line-ending sequence there.
1108    
1109    Arguments:
1110      p         current position in line
1111      endptr    end of available data
1112      lenptr    where to put the length of the eol sequence
1113    
1114    Returns:    pointer after the last byte of the line,
1115                including the newline byte(s)
1116    */
1117    
1118    static char *
1119    end_of_line(char *p, char *endptr, int *lenptr)
1120    {
1121    switch(endlinetype)
1122      {
1123      default:      /* Just in case */
1124      case EL_LF:
1125      while (p < endptr && *p != '\n') p++;
1126      if (p < endptr)
1127        {
1128        *lenptr = 1;
1129        return p + 1;
1130        }
1131      *lenptr = 0;
1132      return endptr;
1133    
1134      case EL_CR:
1135      while (p < endptr && *p != '\r') p++;
1136      if (p < endptr)
1137      {      {
1138      option_item *op;      *lenptr = 1;
1139        return p + 1;
1140        }
1141      *lenptr = 0;
1142      return endptr;
1143    
1144      if (strncmp(argv[i]+2, "file=", 5) == 0)    case EL_CRLF:
1145      for (;;)
1146        {
1147        while (p < endptr && *p != '\r') p++;
1148        if (++p >= endptr)
1149          {
1150          *lenptr = 0;
1151          return endptr;
1152          }
1153        if (*p == '\n')
1154        {        {
1155        pattern_filename = argv[i] + 7;        *lenptr = 2;
1156        continue;        return p + 1;
1157        }        }
1158        }
1159      break;
1160    
1161      for (op = optionlist; op->one_char != 0; op++)    case EL_ANYCRLF:
1162      while (p < endptr)
1163        {
1164        int extra = 0;
1165        register int c = *((unsigned char *)p);
1166    
1167        if (utf8 && c >= 0xc0)
1168        {        {
1169        if (strcmp(argv[i]+2, op->long_name) == 0)        int gcii, gcss;
1170          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1171          gcss = 6*extra;
1172          c = (c & utf8_table3[extra]) << gcss;
1173          for (gcii = 1; gcii <= extra; gcii++)
1174          {          {
1175          options = handle_option(op->one_char, options);          gcss -= 6;
1176          break;          c |= (p[gcii] & 0x3f) << gcss;
1177          }          }
1178        }        }
1179      if (op->one_char == 0)  
1180        p += 1 + extra;
1181    
1182        switch (c)
1183        {        {
1184        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        case '\n':
1185        exit(usage(2));        *lenptr = 1;
1186          return p;
1187    
1188          case '\r':
1189          if (p < endptr && *p == '\n')
1190            {
1191            *lenptr = 2;
1192            p++;
1193            }
1194          else *lenptr = 1;
1195          return p;
1196    
1197          default:
1198          break;
1199        }        }
1200      }      }   /* End of loop for ANYCRLF case */
1201    
1202    /* One-char options */    *lenptr = 0;  /* Must have hit the end */
1203      return endptr;
1204    
1205    else    case EL_ANY:
1206      while (p < endptr)
1207      {      {
1208      char *s = argv[i] + 1;      int extra = 0;
1209      while (*s != 0)      register int c = *((unsigned char *)p);
1210    
1211        if (utf8 && c >= 0xc0)
1212        {        {
1213        if (*s == 'f')        int gcii, gcss;
1214          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1215          gcss = 6*extra;
1216          c = (c & utf8_table3[extra]) << gcss;
1217          for (gcii = 1; gcii <= extra; gcii++)
1218          {          {
1219          pattern_filename = s + 1;          gcss -= 6;
1220          if (pattern_filename[0] == 0)          c |= (p[gcii] & 0x3f) << gcss;
           {  
           if (i >= argc - 1)  
             {  
             fprintf(stderr, "pcregrep: File name missing after -f\n");  
             exit(usage(2));  
             }  
           pattern_filename = argv[++i];  
           }  
         break;  
1221          }          }
       else options = handle_option(*s++, options);  
1222        }        }
     }  
   }  
1223    
1224  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));      p += 1 + extra;
 hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  
1225    
1226  if (pattern_list == NULL || hints_list == NULL)      switch (c)
1227    {        {
1228    fprintf(stderr, "pcregrep: malloc failed\n");        case '\n':    /* LF */
1229    return 2;        case '\v':    /* VT */
1230    }        case '\f':    /* FF */
1231          *lenptr = 1;
1232          return p;
1233    
1234          case '\r':    /* CR */
1235          if (p < endptr && *p == '\n')
1236            {
1237            *lenptr = 2;
1238            p++;
1239            }
1240          else *lenptr = 1;
1241          return p;
1242    
1243    #ifndef EBCDIC
1244          case 0x85:    /* Unicode NEL */
1245          *lenptr = utf8? 2 : 1;
1246          return p;
1247    
1248          case 0x2028:  /* Unicode LS */
1249          case 0x2029:  /* Unicode PS */
1250          *lenptr = 3;
1251          return p;
1252    #endif  /* Not EBCDIC */
1253    
1254          default:
1255          break;
1256          }
1257        }   /* End of loop for ANY case */
1258    
1259  /* Compile the regular expression(s). */    *lenptr = 0;  /* Must have hit the end */
1260      return endptr;
1261      }     /* End of overall switch */
1262    }
1263    
1264    
1265    
1266    /*************************************************
1267    *         Find start of previous line            *
1268    *************************************************/
1269    
1270    /* This is called when looking back for before lines to print.
1271    
1272    Arguments:
1273      p         start of the subsequent line
1274      startptr  start of available data
1275    
1276  if (pattern_filename != NULL)  Returns:    pointer to the start of the previous line
1277    */
1278    
1279    static char *
1280    previous_line(char *p, char *startptr)
1281    {
1282    switch(endlinetype)
1283    {    {
1284    FILE *f = fopen(pattern_filename, "r");    default:      /* Just in case */
1285    char buffer[BUFSIZ];    case EL_LF:
1286    if (f == NULL)    p--;
1287      while (p > startptr && p[-1] != '\n') p--;
1288      return p;
1289    
1290      case EL_CR:
1291      p--;
1292      while (p > startptr && p[-1] != '\n') p--;
1293      return p;
1294    
1295      case EL_CRLF:
1296      for (;;)
1297      {      {
1298      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      p -= 2;
1299        strerror(errno));      while (p > startptr && p[-1] != '\n') p--;
1300      return 2;      if (p <= startptr + 1 || p[-2] == '\r') return p;
1301      }      }
1302    while (fgets(buffer, sizeof(buffer), f) != NULL)    /* Control can never get here */
1303    
1304      case EL_ANY:
1305      case EL_ANYCRLF:
1306      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1307      if (utf8) while ((*p & 0xc0) == 0x80) p--;
1308    
1309      while (p > startptr)
1310      {      {
1311      char *s = buffer + (int)strlen(buffer);      register unsigned int c;
1312      if (pattern_count >= MAX_PATTERN_COUNT)      char *pp = p - 1;
1313    
1314        if (utf8)
1315        {        {
1316        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        int extra = 0;
1317          MAX_PATTERN_COUNT);        while ((*pp & 0xc0) == 0x80) pp--;
1318        return 2;        c = *((unsigned char *)pp);
1319          if (c >= 0xc0)
1320            {
1321            int gcii, gcss;
1322            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1323            gcss = 6*extra;
1324            c = (c & utf8_table3[extra]) << gcss;
1325            for (gcii = 1; gcii <= extra; gcii++)
1326              {
1327              gcss -= 6;
1328              c |= (pp[gcii] & 0x3f) << gcss;
1329              }
1330            }
1331        }        }
1332      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      else c = *((unsigned char *)pp);
1333      if (s == buffer) continue;  
1334      *s = 0;      if (endlinetype == EL_ANYCRLF) switch (c)
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
1335        {        {
1336        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        case '\n':    /* LF */
1337          pattern_count, errptr, error);        case '\r':    /* CR */
1338        return 2;        return p;
1339    
1340          default:
1341          break;
1342          }
1343    
1344        else switch (c)
1345          {
1346          case '\n':    /* LF */
1347          case '\v':    /* VT */
1348          case '\f':    /* FF */
1349          case '\r':    /* CR */
1350    #ifndef EBCDIE
1351          case 0x85:    /* Unicode NEL */
1352          case 0x2028:  /* Unicode LS */
1353          case 0x2029:  /* Unicode PS */
1354    #endif  /* Not EBCDIC */
1355          return p;
1356    
1357          default:
1358          break;
1359        }        }
     }  
   fclose(f);  
   }  
1360    
1361  /* If no file name, a single regex must be given inline */      p = pp;  /* Back one character */
1362        }        /* End of loop for ANY case */
1363    
1364  else    return startptr;  /* Hit start of data */
1365      }     /* End of overall switch */
1366    }
1367    
1368    
1369    
1370    
1371    
1372    /*************************************************
1373    *       Print the previous "after" lines         *
1374    *************************************************/
1375    
1376    /* This is called if we are about to lose said lines because of buffer filling,
1377    and at the end of the file. The data in the line is written using fwrite() so
1378    that a binary zero does not terminate it.
1379    
1380    Arguments:
1381      lastmatchnumber   the number of the last matching line, plus one
1382      lastmatchrestart  where we restarted after the last match
1383      endptr            end of available data
1384      printname         filename for printing
1385    
1386    Returns:            nothing
1387    */
1388    
1389    static void
1390    do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1391      char *printname)
1392    {
1393    if (after_context > 0 && lastmatchnumber > 0)
1394    {    {
1395    if (i >= argc) return usage(0);    int count = 0;
1396    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    while (lastmatchrestart < endptr && count++ < after_context)
   if (pattern_list[0] == NULL)  
1397      {      {
1398      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      int ellength;
1399        error);      char *pp = lastmatchrestart;
1400      return 2;      if (printname != NULL) fprintf(stdout, "%s-", printname);
1401        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1402        pp = end_of_line(pp, endptr, &ellength);
1403        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1404        lastmatchrestart = pp;
1405      }      }
1406    pattern_count++;    hyphenpending = TRUE;
1407    }    }
1408    }
1409    
1410    
 /* Study the regular expressions, as we will be running them may times */  
1411    
1412  for (j = 0; j < pattern_count; j++)  /*************************************************
1413    *   Apply patterns to subject till one matches   *
1414    *************************************************/
1415    
1416    /* This function is called to run through all patterns, looking for a match. It
1417    is used multiple times for the same subject when colouring is enabled, in order
1418    to find all possible matches.
1419    
1420    Arguments:
1421      matchptr     the start of the subject
1422      length       the length of the subject to match
1423      options      options for pcre_exec
1424      startoffset  where to start matching
1425      offsets      the offets vector to fill in
1426      mrc          address of where to put the result of pcre_exec()
1427    
1428    Returns:      TRUE if there was a match
1429                  FALSE if there was no match
1430                  invert if there was a non-fatal error
1431    */
1432    
1433    static BOOL
1434    match_patterns(char *matchptr, size_t length, unsigned int options,
1435      int startoffset, int *offsets, int *mrc)
1436    {
1437    int i;
1438    size_t slen = length;
1439    patstr *p = patterns;
1440    const char *msg = "this text:\n\n";
1441    
1442    if (slen > 200)
1443    {    {
1444    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    slen = 200;
1445    if (error != NULL)    msg = "text that starts:\n\n";
1446      }
1447    for (i = 1; p != NULL; p = p->next, i++)
1448      {
1449      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1450        startoffset, options, offsets, OFFSET_SIZE);
1451      if (*mrc >= 0) return TRUE;
1452      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1453      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1454      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1455      fprintf(stderr, "%s", msg);
1456      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1457      fprintf(stderr, "\n\n");
1458      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1459          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1460        resource_error = TRUE;
1461      if (error_count++ > 20)
1462      {      {
1463      char s[16];      fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1464      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      pcregrep_exit(2);
     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);  
     return 2;  
1465      }      }
1466      return invert;    /* No more matching; don't show the line again */
1467    }    }
1468    
1469  /* If there are no further arguments, do the business on stdin and exit */  return FALSE;  /* No match, no errors */
1470    }
1471    
1472    
 if (i >= argc) return pcregrep(stdin, NULL);  
1473    
1474  /* Otherwise, work through the remaining arguments as files or directories.  /*************************************************
1475  Pass in the fact that there is only one argument at top level - this suppresses  *            Grep an individual file             *
1476  the file name if the argument is not a directory. */  *************************************************/
1477    
1478  only_one_at_top = (i == argc - 1);  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1479  if (filenames_only) filenames = TRUE;  times the value of bufthird. The matching point is never allowed to stray into
1480    the top third of the buffer, thus keeping more of the file available for
1481    context printing or for multiline scanning. For large files, the pointer will
1482    be in the middle third most of the time, so the bottom third is available for
1483    "before" context printing.
1484    
1485    Arguments:
1486      handle       the fopened FILE stream for a normal file
1487                   the gzFile pointer when reading is via libz
1488                   the BZFILE pointer when reading is via libbz2
1489      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1490      filename     the file name or NULL (for errors)
1491      printname    the file name if it is to be printed for each match
1492                   or NULL if the file name is not to be printed
1493                   it cannot be NULL if filenames[_nomatch]_only is set
1494    
1495    Returns:       0 if there was at least one match
1496                   1 otherwise (no matches)
1497                   2 if an overlong line is encountered
1498                   3 if there is a read error on a .bz2 file
1499    */
1500    
1501  for (; i < argc; i++)  static int
1502    pcregrep(void *handle, int frtype, char *filename, char *printname)
1503    {
1504    int rc = 1;
1505    int linenumber = 1;
1506    int lastmatchnumber = 0;
1507    int count = 0;
1508    int filepos = 0;
1509    int offsets[OFFSET_SIZE];
1510    char *lastmatchrestart = NULL;
1511    char *ptr = main_buffer;
1512    char *endptr;
1513    size_t bufflength;
1514    BOOL binary = FALSE;
1515    BOOL endhyphenpending = FALSE;
1516    BOOL input_line_buffered = line_buffered;
1517    FILE *in = NULL;                    /* Ensure initialized */
1518    
1519    #ifdef SUPPORT_LIBZ
1520    gzFile ingz = NULL;
1521    #endif
1522    
1523    #ifdef SUPPORT_LIBBZ2
1524    BZFILE *inbz2 = NULL;
1525    #endif
1526    
1527    
1528    /* Do the first read into the start of the buffer and set up the pointer to end
1529    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1530    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1531    fail. */
1532    
1533    (void)frtype;
1534    
1535    #ifdef SUPPORT_LIBZ
1536    if (frtype == FR_LIBZ)
1537    {    {
1538    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    ingz = (gzFile)handle;
1539    if (frc == 0 && rc == 1) rc = 0;    bufflength = gzread (ingz, main_buffer, bufsize);
1540    }    }
1541    else
1542    #endif
1543    
1544  return rc;  #ifdef SUPPORT_LIBBZ2
1545    if (frtype == FR_LIBBZ2)
1546      {
1547      inbz2 = (BZFILE *)handle;
1548      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1549      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1550      }                                    /* without the cast it is unsigned. */
1551    else
1552    #endif
1553    
1554      {
1555      in = (FILE *)handle;
1556      if (is_file_tty(in)) input_line_buffered = TRUE;
1557      bufflength = input_line_buffered?
1558        read_one_line(main_buffer, bufsize, in) :
1559        fread(main_buffer, 1, bufsize, in);
1560      }
1561    
1562    endptr = main_buffer + bufflength;
1563    
1564    /* Unless binary-files=text, see if we have a binary file. This uses the same
1565    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1566    file. */
1567    
1568    if (binary_files != BIN_TEXT)
1569      {
1570      binary =
1571        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1572      if (binary && binary_files == BIN_NOMATCH) return 1;
1573      }
1574    
1575    /* Loop while the current pointer is not at the end of the file. For large
1576    files, endptr will be at the end of the buffer when we are in the middle of the
1577    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1578    way, the buffer is shifted left and re-filled. */
1579    
1580    while (ptr < endptr)
1581      {
1582      int endlinelength;
1583      int mrc = 0;
1584      int startoffset = 0;
1585      int prevoffsets[2];
1586      unsigned int options = 0;
1587      BOOL match;
1588      char *matchptr = ptr;
1589      char *t = ptr;
1590      size_t length, linelength;
1591    
1592      prevoffsets[0] = prevoffsets[1] = -1;
1593    
1594      /* At this point, ptr is at the start of a line. We need to find the length
1595      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1596      length remainder of the data in the buffer. Otherwise, it is the length of
1597      the next line, excluding the terminating newline. After matching, we always
1598      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1599      option is used for compiling, so that any match is constrained to be in the
1600      first line. */
1601    
1602      t = end_of_line(t, endptr, &endlinelength);
1603      linelength = t - ptr - endlinelength;
1604      length = multiline? (size_t)(endptr - ptr) : linelength;
1605    
1606      /* Check to see if the line we are looking at extends right to the very end
1607      of the buffer without a line terminator. This means the line is too long to
1608      handle. */
1609    
1610      if (endlinelength == 0 && t == main_buffer + bufsize)
1611        {
1612        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1613                        "pcregrep: check the --buffer-size option\n",
1614                        linenumber,
1615                        (filename == NULL)? "" : " of file ",
1616                        (filename == NULL)? "" : filename);
1617        return 2;
1618        }
1619    
1620      /* Extra processing for Jeffrey Friedl's debugging. */
1621    
1622    #ifdef JFRIEDL_DEBUG
1623      if (jfriedl_XT || jfriedl_XR)
1624      {
1625    #     include <sys/time.h>
1626    #     include <time.h>
1627          struct timeval start_time, end_time;
1628          struct timezone dummy;
1629          int i;
1630    
1631          if (jfriedl_XT)
1632          {
1633              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1634              const char *orig = ptr;
1635              ptr = malloc(newlen + 1);
1636              if (!ptr) {
1637                      printf("out of memory");
1638                      pcregrep_exit(2);
1639              }
1640              endptr = ptr;
1641              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1642              for (i = 0; i < jfriedl_XT; i++) {
1643                      strncpy(endptr, orig,  length);
1644                      endptr += length;
1645              }
1646              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1647              length = newlen;
1648          }
1649    
1650          if (gettimeofday(&start_time, &dummy) != 0)
1651                  perror("bad gettimeofday");
1652    
1653    
1654          for (i = 0; i < jfriedl_XR; i++)
1655              match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1656                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1657    
1658          if (gettimeofday(&end_time, &dummy) != 0)
1659                  perror("bad gettimeofday");
1660    
1661          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1662                          -
1663                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1664    
1665          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1666          return 0;
1667      }
1668    #endif
1669    
1670      /* We come back here after a match when show_only_matching is set, in order
1671      to find any further matches in the same line. This applies to
1672      --only-matching, --file-offsets, and --line-offsets. */
1673    
1674      ONLY_MATCHING_RESTART:
1675    
1676      /* Run through all the patterns until one matches or there is an error other
1677      than NOMATCH. This code is in a subroutine so that it can be re-used for
1678      finding subsequent matches when colouring matched lines. After finding one
1679      match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1680      this line. */
1681    
1682      match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1683      options = PCRE_NOTEMPTY;
1684    
1685      /* If it's a match or a not-match (as required), do what's wanted. */
1686    
1687      if (match != invert)
1688        {
1689        BOOL hyphenprinted = FALSE;
1690    
1691        /* We've failed if we want a file that doesn't have any matches. */
1692    
1693        if (filenames == FN_NOMATCH_ONLY) return 1;
1694    
1695        /* Just count if just counting is wanted. */
1696    
1697        if (count_only) count++;
1698    
1699        /* When handling a binary file and binary-files==binary, the "binary"
1700        variable will be set true (it's false in all other cases). In this
1701        situation we just want to output the file name. No need to scan further. */
1702    
1703        else if (binary)
1704          {
1705          fprintf(stdout, "Binary file %s matches\n", filename);
1706          return 0;
1707          }
1708    
1709        /* If all we want is a file name, there is no need to scan any more lines
1710        in the file. */
1711    
1712        else if (filenames == FN_MATCH_ONLY)
1713          {
1714          fprintf(stdout, "%s\n", printname);
1715          return 0;
1716          }
1717    
1718        /* Likewise, if all we want is a yes/no answer. */
1719    
1720        else if (quiet) return 0;
1721    
1722        /* The --only-matching option prints just the substring that matched,
1723        and/or one or more captured portions of it, as long as these strings are
1724        not empty. The --file-offsets and --line-offsets options output offsets for
1725        the matching substring (all three set show_only_matching). None of these
1726        mutually exclusive options prints any context. Afterwards, adjust the start
1727        and then jump back to look for further matches in the same line. If we are
1728        in invert mode, however, nothing is printed and we do not restart - this
1729        could still be useful because the return code is set. */
1730    
1731        else if (show_only_matching)
1732          {
1733          if (!invert)
1734            {
1735            int oldstartoffset = startoffset;
1736    
1737            /* It is possible, when a lookbehind assertion contains \K, for the
1738            same string to be found again. The code below advances startoffset, but
1739            until it is past the "bumpalong" offset that gave the match, the same
1740            substring will be returned. The PCRE1 library does not return the
1741            bumpalong offset, so all we can do is ignore repeated strings. (PCRE2
1742            does this better.) */
1743    
1744            if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1])
1745              {
1746              prevoffsets[0] = offsets[0];
1747              prevoffsets[1] = offsets[1];
1748    
1749              if (printname != NULL) fprintf(stdout, "%s:", printname);
1750              if (number) fprintf(stdout, "%d:", linenumber);
1751    
1752              /* Handle --line-offsets */
1753    
1754              if (line_offsets)
1755                fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1756                  offsets[1] - offsets[0]);
1757    
1758              /* Handle --file-offsets */
1759    
1760              else if (file_offsets)
1761                fprintf(stdout, "%d,%d\n",
1762                  (int)(filepos + matchptr + offsets[0] - ptr),
1763                  offsets[1] - offsets[0]);
1764    
1765              /* Handle --only-matching, which may occur many times */
1766    
1767              else
1768                {
1769                BOOL printed = FALSE;
1770                omstr *om;
1771    
1772                for (om = only_matching; om != NULL; om = om->next)
1773                  {
1774                  int n = om->groupnum;
1775                  if (n < mrc)
1776                    {
1777                    int plen = offsets[2*n + 1] - offsets[2*n];
1778                    if (plen > 0)
1779                      {
1780                      if (printed) fprintf(stdout, "%s", om_separator);
1781                      if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1782                      FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1783                      if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1784                      printed = TRUE;
1785                      }
1786                    }
1787                  }
1788    
1789                if (printed || printname != NULL || number) fprintf(stdout, "\n");
1790                }
1791              }
1792    
1793            /* Prepare to repeat to find the next match. If the patterned contained
1794            a lookbehind tht included \K, it is possible that the end of the match
1795            might be at or before the actual strting offset we have just used. We
1796            need to start one character further on. Unfortunately, for unanchored
1797            patterns, the actual start offset can be greater that the one that was
1798            set as a result of "bumpalong". PCRE1 does not return the actual start
1799            offset, so we have to check against the original start offset. This may
1800            lead to duplicates - we we need the fudge above to avoid printing them.
1801            (PCRE2 does this better.) */
1802    
1803            match = FALSE;
1804            if (line_buffered) fflush(stdout);
1805            rc = 0;                      /* Had some success */
1806            startoffset = offsets[1];    /* Restart after the match */
1807            if (startoffset <= oldstartoffset)
1808              {
1809              if ((size_t)startoffset >= length)
1810                goto END_ONE_MATCH;              /* We were at the end */
1811              startoffset = oldstartoffset + 1;
1812              if (utf8)
1813                while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
1814              }
1815            goto ONLY_MATCHING_RESTART;
1816            }
1817          }
1818    
1819        /* This is the default case when none of the above options is set. We print
1820        the matching lines(s), possibly preceded and/or followed by other lines of
1821        context. */
1822    
1823        else
1824          {
1825          /* See if there is a requirement to print some "after" lines from a
1826          previous match. We never print any overlaps. */
1827    
1828          if (after_context > 0 && lastmatchnumber > 0)
1829            {
1830            int ellength;
1831            int linecount = 0;
1832            char *p = lastmatchrestart;
1833    
1834            while (p < ptr && linecount < after_context)
1835              {
1836              p = end_of_line(p, ptr, &ellength);
1837              linecount++;
1838              }
1839    
1840            /* It is important to advance lastmatchrestart during this printing so
1841            that it interacts correctly with any "before" printing below. Print
1842            each line's data using fwrite() in case there are binary zeroes. */
1843    
1844            while (lastmatchrestart < p)
1845              {
1846              char *pp = lastmatchrestart;
1847              if (printname != NULL) fprintf(stdout, "%s-", printname);
1848              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1849              pp = end_of_line(pp, endptr, &ellength);
1850              FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1851              lastmatchrestart = pp;
1852              }
1853            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1854            }
1855    
1856          /* If there were non-contiguous lines printed above, insert hyphens. */
1857    
1858          if (hyphenpending)
1859            {
1860            fprintf(stdout, "--\n");
1861            hyphenpending = FALSE;
1862            hyphenprinted = TRUE;
1863            }
1864    
1865          /* See if there is a requirement to print some "before" lines for this
1866          match. Again, don't print overlaps. */
1867    
1868          if (before_context > 0)
1869            {
1870            int linecount = 0;
1871            char *p = ptr;
1872    
1873            while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1874                   linecount < before_context)
1875              {
1876              linecount++;
1877              p = previous_line(p, main_buffer);
1878              }
1879    
1880            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1881              fprintf(stdout, "--\n");
1882    
1883            while (p < ptr)
1884              {
1885              int ellength;
1886              char *pp = p;
1887              if (printname != NULL) fprintf(stdout, "%s-", printname);
1888              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1889              pp = end_of_line(pp, endptr, &ellength);
1890              FWRITE(p, 1, pp - p, stdout);
1891              p = pp;
1892              }
1893            }
1894    
1895          /* Now print the matching line(s); ensure we set hyphenpending at the end
1896          of the file if any context lines are being output. */
1897    
1898          if (after_context > 0 || before_context > 0)
1899            endhyphenpending = TRUE;
1900    
1901          if (printname != NULL) fprintf(stdout, "%s:", printname);
1902          if (number) fprintf(stdout, "%d:", linenumber);
1903    
1904          /* In multiline mode, we want to print to the end of the line in which
1905          the end of the matched string is found, so we adjust linelength and the
1906          line number appropriately, but only when there actually was a match
1907          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1908          the match will always be before the first newline sequence. */
1909    
1910          if (multiline & !invert)
1911            {
1912            char *endmatch = ptr + offsets[1];
1913            t = ptr;
1914            while (t <= endmatch)
1915              {
1916              t = end_of_line(t, endptr, &endlinelength);
1917              if (t < endmatch) linenumber++; else break;
1918              }
1919            linelength = t - ptr - endlinelength;
1920            }
1921    
1922          /*** NOTE: Use only fwrite() to output the data line, so that binary
1923          zeroes are treated as just another data character. */
1924    
1925          /* This extra option, for Jeffrey Friedl's debugging requirements,
1926          replaces the matched string, or a specific captured string if it exists,
1927          with X. When this happens, colouring is ignored. */
1928    
1929    #ifdef JFRIEDL_DEBUG
1930          if (S_arg >= 0 && S_arg < mrc)
1931            {
1932            int first = S_arg * 2;
1933            int last  = first + 1;
1934            FWRITE(ptr, 1, offsets[first], stdout);
1935            fprintf(stdout, "X");
1936            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1937            }
1938          else
1939    #endif
1940    
1941          /* We have to split the line(s) up if colouring, and search for further
1942          matches, but not of course if the line is a non-match. */
1943    
1944          if (do_colour && !invert)
1945            {
1946            int plength;
1947            FWRITE(ptr, 1, offsets[0], stdout);
1948            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1949            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1950            fprintf(stdout, "%c[00m", 0x1b);
1951            for (;;)
1952              {
1953              startoffset = offsets[1];
1954              if (startoffset >= (int)linelength + endlinelength ||
1955                  !match_patterns(matchptr, length, options, startoffset, offsets,
1956                    &mrc))
1957                break;
1958              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1959              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1960              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1961              fprintf(stdout, "%c[00m", 0x1b);
1962              }
1963    
1964            /* In multiline mode, we may have already printed the complete line
1965            and its line-ending characters (if they matched the pattern), so there
1966            may be no more to print. */
1967    
1968            plength = (int)((linelength + endlinelength) - startoffset);
1969            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1970            }
1971    
1972          /* Not colouring; no need to search for further matches */
1973    
1974          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1975          }
1976    
1977        /* End of doing what has to be done for a match. If --line-buffered was
1978        given, flush the output. */
1979    
1980        if (line_buffered) fflush(stdout);
1981        rc = 0;    /* Had some success */
1982    
1983        /* Remember where the last match happened for after_context. We remember
1984        where we are about to restart, and that line's number. */
1985    
1986        lastmatchrestart = ptr + linelength + endlinelength;
1987        lastmatchnumber = linenumber + 1;
1988        }
1989    
1990      /* For a match in multiline inverted mode (which of course did not cause
1991      anything to be printed), we have to move on to the end of the match before
1992      proceeding. */
1993    
1994      if (multiline && invert && match)
1995        {
1996        int ellength;
1997        char *endmatch = ptr + offsets[1];
1998        t = ptr;
1999        while (t < endmatch)
2000          {
2001          t = end_of_line(t, endptr, &ellength);
2002          if (t <= endmatch) linenumber++; else break;
2003          }
2004        endmatch = end_of_line(endmatch, endptr, &ellength);
2005        linelength = endmatch - ptr - ellength;
2006        }
2007    
2008      /* Advance to after the newline and increment the line number. The file
2009      offset to the current line is maintained in filepos. */
2010    
2011      END_ONE_MATCH:
2012      ptr += linelength + endlinelength;
2013      filepos += (int)(linelength + endlinelength);
2014      linenumber++;
2015    
2016      /* If input is line buffered, and the buffer is not yet full, read another
2017      line and add it into the buffer. */
2018    
2019      if (input_line_buffered && bufflength < (size_t)bufsize)
2020        {
2021        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2022        bufflength += add;
2023        endptr += add;
2024        }
2025    
2026      /* If we haven't yet reached the end of the file (the buffer is full), and
2027      the current point is in the top 1/3 of the buffer, slide the buffer down by
2028      1/3 and refill it. Before we do this, if some unprinted "after" lines are
2029      about to be lost, print them. */
2030    
2031      if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
2032        {
2033        if (after_context > 0 &&
2034            lastmatchnumber > 0 &&
2035            lastmatchrestart < main_buffer + bufthird)
2036          {
2037          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2038          lastmatchnumber = 0;
2039          }
2040    
2041        /* Now do the shuffle */
2042    
2043        memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2044        ptr -= bufthird;
2045    
2046    #ifdef SUPPORT_LIBZ
2047        if (frtype == FR_LIBZ)
2048          bufflength = 2*bufthird +
2049            gzread (ingz, main_buffer + 2*bufthird, bufthird);
2050        else
2051    #endif
2052    
2053    #ifdef SUPPORT_LIBBZ2
2054        if (frtype == FR_LIBBZ2)
2055          bufflength = 2*bufthird +
2056            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2057        else
2058    #endif
2059    
2060        bufflength = 2*bufthird +
2061          (input_line_buffered?
2062           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2063           fread(main_buffer + 2*bufthird, 1, bufthird, in));
2064        endptr = main_buffer + bufflength;
2065    
2066        /* Adjust any last match point */
2067    
2068        if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2069        }
2070      }     /* Loop through the whole file */
2071    
2072    /* End of file; print final "after" lines if wanted; do_after_lines sets
2073    hyphenpending if it prints something. */
2074    
2075    if (!show_only_matching && !count_only)
2076      {
2077      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2078      hyphenpending |= endhyphenpending;
2079      }
2080    
2081    /* Print the file name if we are looking for those without matches and there
2082    were none. If we found a match, we won't have got this far. */
2083    
2084    if (filenames == FN_NOMATCH_ONLY)
2085      {
2086      fprintf(stdout, "%s\n", printname);
2087      return 0;
2088      }
2089    
2090    /* Print the match count if wanted */
2091    
2092    if (count_only)
2093      {
2094      if (count > 0 || !omit_zero_count)
2095        {
2096        if (printname != NULL && filenames != FN_NONE)
2097          fprintf(stdout, "%s:", printname);
2098        fprintf(stdout, "%d\n", count);
2099        }
2100      }
2101    
2102    return rc;
2103    }
2104    
2105    
2106    
2107    /*************************************************
2108    *     Grep a file or recurse into a directory    *
2109    *************************************************/
2110    
2111    /* Given a path name, if it's a directory, scan all the files if we are
2112    recursing; if it's a file, grep it.
2113    
2114    Arguments:
2115      pathname          the path to investigate
2116      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
2117      only_one_at_top   TRUE if the path is the only one at toplevel
2118    
2119    Returns:  -1 the file/directory was skipped
2120               0 if there was at least one match
2121               1 if there were no matches
2122               2 there was some kind of error
2123    
2124    However, file opening failures are suppressed if "silent" is set.
2125    */
2126    
2127    static int
2128    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2129    {
2130    int rc = 1;
2131    int frtype;
2132    void *handle;
2133    char *lastcomp;
2134    FILE *in = NULL;           /* Ensure initialized */
2135    
2136    #ifdef SUPPORT_LIBZ
2137    gzFile ingz = NULL;
2138    #endif
2139    
2140    #ifdef SUPPORT_LIBBZ2
2141    BZFILE *inbz2 = NULL;
2142    #endif
2143    
2144    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2145    int pathlen;
2146    #endif
2147    
2148    #if defined NATIVE_ZOS
2149    int zos_type;
2150    FILE *zos_test_file;
2151    #endif
2152    
2153    /* If the file name is "-" we scan stdin */
2154    
2155    if (strcmp(pathname, "-") == 0)
2156      {
2157      return pcregrep(stdin, FR_PLAIN, stdin_name,
2158        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2159          stdin_name : NULL);
2160      }
2161    
2162    /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2163    directories, whereas --include and --exclude apply to everything else. The test
2164    is against the final component of the path. */
2165    
2166    lastcomp = strrchr(pathname, FILESEP);
2167    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2168    
2169    /* If the file is a directory, skip if not recursing or if explicitly excluded.
2170    Otherwise, scan the directory and recurse for each path within it. The scanning
2171    code is localized so it can be made system-specific. */
2172    
2173    
2174    /* For z/OS, determine the file type. */
2175    
2176    #if defined NATIVE_ZOS
2177    zos_test_file =  fopen(pathname,"rb");
2178    
2179    if (zos_test_file == NULL)
2180       {
2181       if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2182         pathname, strerror(errno));
2183       return -1;
2184       }
2185    zos_type = identifyzosfiletype (zos_test_file);
2186    fclose (zos_test_file);
2187    
2188    /* Handle a PDS in separate code */
2189    
2190    if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2191       {
2192       return travelonpdsdir (pathname, only_one_at_top);
2193       }
2194    
2195    /* Deal with regular files in the normal way below. These types are:
2196       zos_type == __ZOS_PDS_MEMBER
2197       zos_type == __ZOS_PS
2198       zos_type == __ZOS_VSAM_KSDS
2199       zos_type == __ZOS_VSAM_ESDS
2200       zos_type == __ZOS_VSAM_RRDS
2201    */
2202    
2203    /* Handle a z/OS directory using common code. */
2204    
2205    else if (zos_type == __ZOS_HFS)
2206     {
2207    #endif  /* NATIVE_ZOS */
2208    
2209    
2210    /* Handle directories: common code for all OS */
2211    
2212    if (isdirectory(pathname))
2213      {
2214      if (dee_action == dee_SKIP ||
2215          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2216        return -1;
2217    
2218      if (dee_action == dee_RECURSE)
2219        {
2220        char buffer[1024];
2221        char *nextfile;
2222        directory_type *dir = opendirectory(pathname);
2223    
2224        if (dir == NULL)
2225          {
2226          if (!silent)
2227            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2228              strerror(errno));
2229          return 2;
2230          }
2231    
2232        while ((nextfile = readdirectory(dir)) != NULL)
2233          {
2234          int frc;
2235          sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2236          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2237          if (frc > 1) rc = frc;
2238           else if (frc == 0 && rc == 1) rc = 0;
2239          }
2240    
2241        closedirectory(dir);
2242        return rc;
2243        }
2244      }
2245    
2246    #if defined NATIVE_ZOS
2247     }
2248    #endif
2249    
2250    /* If the file is not a directory, check for a regular file, and if it is not,
2251    skip it if that's been requested. Otherwise, check for an explicit inclusion or
2252    exclusion. */
2253    
2254    else if (
2255    #if defined NATIVE_ZOS
2256            (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2257    #else  /* all other OS */
2258            (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2259    #endif
2260            !test_incexc(lastcomp, include_patterns, exclude_patterns))
2261      return -1;  /* File skipped */
2262    
2263    /* Control reaches here if we have a regular file, or if we have a directory
2264    and recursion or skipping was not requested, or if we have anything else and
2265    skipping was not requested. The scan proceeds. If this is the first and only
2266    argument at top level, we don't show the file name, unless we are only showing
2267    the file name, or the filename was forced (-H). */
2268    
2269    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2270    pathlen = (int)(strlen(pathname));
2271    #endif
2272    
2273    /* Open using zlib if it is supported and the file name ends with .gz. */
2274    
2275    #ifdef SUPPORT_LIBZ
2276    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2277      {
2278      ingz = gzopen(pathname, "rb");
2279      if (ingz == NULL)
2280        {
2281        if (!silent)
2282          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2283            strerror(errno));
2284        return 2;
2285        }
2286      handle = (void *)ingz;
2287      frtype = FR_LIBZ;
2288      }
2289    else
2290    #endif
2291    
2292    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2293    
2294    #ifdef SUPPORT_LIBBZ2
2295    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2296      {
2297      inbz2 = BZ2_bzopen(pathname, "rb");
2298      handle = (void *)inbz2;
2299      frtype = FR_LIBBZ2;
2300      }
2301    else
2302    #endif
2303    
2304    /* Otherwise use plain fopen(). The label is so that we can come back here if
2305    an attempt to read a .bz2 file indicates that it really is a plain file. */
2306    
2307    #ifdef SUPPORT_LIBBZ2
2308    PLAIN_FILE:
2309    #endif
2310      {
2311      in = fopen(pathname, "rb");
2312      handle = (void *)in;
2313      frtype = FR_PLAIN;
2314      }
2315    
2316    /* All the opening methods return errno when they fail. */
2317    
2318    if (handle == NULL)
2319      {
2320      if (!silent)
2321        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2322          strerror(errno));
2323      return 2;
2324      }
2325    
2326    /* Now grep the file */
2327    
2328    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2329      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2330    
2331    /* Close in an appropriate manner. */
2332    
2333    #ifdef SUPPORT_LIBZ
2334    if (frtype == FR_LIBZ)
2335      gzclose(ingz);
2336    else
2337    #endif
2338    
2339    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2340    read failed. If the error indicates that the file isn't in fact bzipped, try
2341    again as a normal file. */
2342    
2343    #ifdef SUPPORT_LIBBZ2
2344    if (frtype == FR_LIBBZ2)
2345      {
2346      if (rc == 3)
2347        {
2348        int errnum;
2349        const char *err = BZ2_bzerror(inbz2, &errnum);
2350        if (errnum == BZ_DATA_ERROR_MAGIC)
2351          {
2352          BZ2_bzclose(inbz2);
2353          goto PLAIN_FILE;
2354          }
2355        else if (!silent)
2356          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2357            pathname, err);
2358        rc = 2;    /* The normal "something went wrong" code */
2359        }
2360      BZ2_bzclose(inbz2);
2361      }
2362    else
2363    #endif
2364    
2365    /* Normal file close */
2366    
2367    fclose(in);
2368    
2369    /* Pass back the yield from pcregrep(). */
2370    
2371    return rc;
2372    }
2373    
2374    
2375    
2376    /*************************************************
2377    *    Handle a single-letter, no data option      *
2378    *************************************************/
2379    
2380    static int
2381    handle_option(int letter, int options)
2382    {
2383    switch(letter)
2384      {
2385      case N_FOFFSETS: file_offsets = TRUE; break;
2386      case N_HELP: help(); pcregrep_exit(0);
2387      case N_LBUFFER: line_buffered = TRUE; break;
2388      case N_LOFFSETS: line_offsets = number = TRUE; break;
2389      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2390      case 'a': binary_files = BIN_TEXT; break;
2391      case 'c': count_only = TRUE; break;
2392      case 'F': process_options |= PO_FIXED_STRINGS; break;
2393      case 'H': filenames = FN_FORCE; break;
2394      case 'I': binary_files = BIN_NOMATCH; break;
2395      case 'h': filenames = FN_NONE; break;
2396      case 'i': options |= PCRE_CASELESS; break;
2397      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2398      case 'L': filenames = FN_NOMATCH_ONLY; break;
2399      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2400      case 'n': number = TRUE; break;
2401    
2402      case 'o':
2403      only_matching_last = add_number(0, only_matching_last);
2404      if (only_matching == NULL) only_matching = only_matching_last;
2405      break;
2406    
2407      case 'q': quiet = TRUE; break;
2408      case 'r': dee_action = dee_RECURSE; break;
2409      case 's': silent = TRUE; break;
2410      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2411      case 'v': invert = TRUE; break;
2412      case 'w': process_options |= PO_WORD_MATCH; break;
2413      case 'x': process_options |= PO_LINE_MATCH; break;
2414    
2415      case 'V':
2416      fprintf(stdout, "pcregrep version %s\n", pcre_version());
2417      pcregrep_exit(0);
2418      break;
2419    
2420      default:
2421      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2422      pcregrep_exit(usage(2));
2423      }
2424    
2425    return options;
2426    }
2427    
2428    
2429    
2430    
2431    /*************************************************
2432    *          Construct printed ordinal             *
2433    *************************************************/
2434    
2435    /* This turns a number into "1st", "3rd", etc. */
2436    
2437    static char *
2438    ordin(int n)
2439    {
2440    static char buffer[8];
2441    char *p = buffer;
2442    sprintf(p, "%d", n);
2443    while (*p != 0) p++;
2444    switch (n%10)
2445      {
2446      case 1: strcpy(p, "st"); break;
2447      case 2: strcpy(p, "nd"); break;
2448      case 3: strcpy(p, "rd"); break;
2449      default: strcpy(p, "th"); break;
2450      }
2451    return buffer;
2452    }
2453    
2454    
2455    
2456    /*************************************************
2457    *          Compile a single pattern              *
2458    *************************************************/
2459    
2460    /* Do nothing if the pattern has already been compiled. This is the case for
2461    include/exclude patterns read from a file.
2462    
2463    When the -F option has been used, each "pattern" may be a list of strings,
2464    separated by line breaks. They will be matched literally. We split such a
2465    string and compile the first substring, inserting an additional block into the
2466    pattern chain.
2467    
2468    Arguments:
2469      p              points to the pattern block
2470      options        the PCRE options
2471      popts          the processing options
2472      fromfile       TRUE if the pattern was read from a file
2473      fromtext       file name or identifying text (e.g. "include")
2474      count          0 if this is the only command line pattern, or
2475                     number of the command line pattern, or
2476                     linenumber for a pattern from a file
2477    
2478    Returns:         TRUE on success, FALSE after an error
2479    */
2480    
2481    static BOOL
2482    compile_pattern(patstr *p, int options, int popts, int fromfile,
2483      const char *fromtext, int count)
2484    {
2485    char buffer[PATBUFSIZE];
2486    const char *error;
2487    char *ps = p->string;
2488    int patlen = strlen(ps);
2489    int errptr;
2490    
2491    if (p->compiled != NULL) return TRUE;
2492    
2493    if ((popts & PO_FIXED_STRINGS) != 0)
2494      {
2495      int ellength;
2496      char *eop = ps + patlen;
2497      char *pe = end_of_line(ps, eop, &ellength);
2498    
2499      if (ellength != 0)
2500        {
2501        if (add_pattern(pe, p) == NULL) return FALSE;
2502        patlen = (int)(pe - ps - ellength);
2503        }
2504      }
2505    
2506    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2507    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2508    if (p->compiled != NULL) return TRUE;
2509    
2510    /* Handle compile errors */
2511    
2512    errptr -= (int)strlen(prefix[popts]);
2513    if (errptr > patlen) errptr = patlen;
2514    
2515    if (fromfile)
2516      {
2517      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2518        "at offset %d: %s\n", count, fromtext, errptr, error);
2519      }
2520    else
2521      {
2522      if (count == 0)
2523        fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2524          fromtext, errptr, error);
2525      else
2526        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2527          ordin(count), fromtext, errptr, error);
2528      }
2529    
2530    return FALSE;
2531    }
2532    
2533    
2534    
2535    /*************************************************
2536    *     Read and compile a file of patterns        *
2537    *************************************************/
2538    
2539    /* This is used for --filelist, --include-from, and --exclude-from.
2540    
2541    Arguments:
2542      name         the name of the file; "-" is stdin
2543      patptr       pointer to the pattern chain anchor
2544      patlastptr   pointer to the last pattern pointer
2545      popts        the process options to pass to pattern_compile()
2546    
2547    Returns:       TRUE if all went well
2548    */
2549    
2550    static BOOL
2551    read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2552    {
2553    int linenumber = 0;
2554    FILE *f;
2555    char *filename;
2556    char buffer[PATBUFSIZE];
2557    
2558    if (strcmp(name, "-") == 0)
2559      {
2560      f = stdin;
2561      filename = stdin_name;
2562      }
2563    else
2564      {
2565      f = fopen(name, "r");
2566      if (f == NULL)
2567        {
2568        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2569        return FALSE;
2570        }
2571      filename = name;
2572      }
2573    
2574    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2575      {
2576      char *s = buffer + (int)strlen(buffer);
2577      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2578      *s = 0;
2579      linenumber++;
2580      if (buffer[0] == 0) continue;   /* Skip blank lines */
2581    
2582      /* Note: this call to add_pattern() puts a pointer to the local variable
2583      "buffer" into the pattern chain. However, that pointer is used only when
2584      compiling the pattern, which happens immediately below, so we flatten it
2585      afterwards, as a precaution against any later code trying to use it. */
2586    
2587      *patlastptr = add_pattern(buffer, *patlastptr);
2588      if (*patlastptr == NULL)
2589        {
2590        if (f != stdin) fclose(f);
2591        return FALSE;
2592        }
2593      if (*patptr == NULL) *patptr = *patlastptr;
2594    
2595      /* This loop is needed because compiling a "pattern" when -F is set may add
2596      on additional literal patterns if the original contains a newline. In the
2597      common case, it never will, because fgets() stops at a newline. However,
2598      the -N option can be used to give pcregrep a different newline setting. */
2599    
2600      for(;;)
2601        {
2602        if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2603            linenumber))
2604          {
2605          if (f != stdin) fclose(f);
2606          return FALSE;
2607          }
2608        (*patlastptr)->string = NULL;            /* Insurance */
2609        if ((*patlastptr)->next == NULL) break;
2610        *patlastptr = (*patlastptr)->next;
2611        }
2612      }
2613    
2614    if (f != stdin) fclose(f);
2615    return TRUE;
2616    }
2617    
2618    
2619    
2620    /*************************************************
2621    *                Main program                    *
2622    *************************************************/
2623    
2624    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2625    
2626    int
2627    main(int argc, char **argv)
2628    {
2629    int i, j;
2630    int rc = 1;
2631    BOOL only_one_at_top;
2632    patstr *cp;
2633    fnstr *fn;
2634    const char *locale_from = "--locale";
2635    const char *error;
2636    
2637    #ifdef SUPPORT_PCREGREP_JIT
2638    pcre_jit_stack *jit_stack = NULL;
2639    #endif
2640    
2641    /* Set the default line ending value from the default in the PCRE library;
2642    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2643    Note that the return values from pcre_config(), though derived from the ASCII
2644    codes, are the same in EBCDIC environments, so we must use the actual values
2645    rather than escapes such as as '\r'. */
2646    
2647    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2648    switch(i)
2649      {
2650      default:               newline = (char *)"lf"; break;
2651      case 13:               newline = (char *)"cr"; break;
2652      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2653      case -1:               newline = (char *)"any"; break;
2654      case -2:               newline = (char *)"anycrlf"; break;
2655      }
2656    
2657    /* Process the options */
2658    
2659    for (i = 1; i < argc; i++)
2660      {
2661      option_item *op = NULL;
2662      char *option_data = (char *)"";    /* default to keep compiler happy */
2663      BOOL longop;
2664      BOOL longopwasequals = FALSE;
2665    
2666      if (argv[i][0] != '-') break;
2667    
2668      /* If we hit an argument that is just "-", it may be a reference to STDIN,
2669      but only if we have previously had -e or -f to define the patterns. */
2670    
2671      if (argv[i][1] == 0)
2672        {
2673        if (pattern_files != NULL || patterns != NULL) break;
2674          else pcregrep_exit(usage(2));
2675        }
2676    
2677      /* Handle a long name option, or -- to terminate the options */
2678    
2679      if (argv[i][1] == '-')
2680        {
2681        char *arg = argv[i] + 2;
2682        char *argequals = strchr(arg, '=');
2683    
2684        if (*arg == 0)    /* -- terminates options */
2685          {
2686          i++;
2687          break;                /* out of the options-handling loop */
2688          }
2689    
2690        longop = TRUE;
2691    
2692        /* Some long options have data that follows after =, for example file=name.
2693        Some options have variations in the long name spelling: specifically, we
2694        allow "regexp" because GNU grep allows it, though I personally go along
2695        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2696        These options are entered in the table as "regex(p)". Options can be in
2697        both these categories. */
2698    
2699        for (op = optionlist; op->one_char != 0; op++)
2700          {
2701          char *opbra = strchr(op->long_name, '(');
2702          char *equals = strchr(op->long_name, '=');
2703    
2704          /* Handle options with only one spelling of the name */
2705    
2706          if (opbra == NULL)     /* Does not contain '(' */
2707            {
2708            if (equals == NULL)  /* Not thing=data case */
2709              {
2710              if (strcmp(arg, op->long_name) == 0) break;
2711              }
2712            else                 /* Special case xxx=data */
2713              {
2714              int oplen = (int)(equals - op->long_name);
2715              int arglen = (argequals == NULL)?
2716                (int)strlen(arg) : (int)(argequals - arg);
2717              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2718                {
2719                option_data = arg + arglen;
2720                if (*option_data == '=')
2721                  {
2722                  option_data++;
2723                  longopwasequals = TRUE;
2724                  }
2725                break;
2726                }
2727              }
2728            }
2729    
2730          /* Handle options with an alternate spelling of the name */
2731    
2732          else
2733            {
2734            char buff1[24];
2735            char buff2[24];
2736    
2737            int baselen = (int)(opbra - op->long_name);
2738            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2739            int arglen = (argequals == NULL || equals == NULL)?
2740              (int)strlen(arg) : (int)(argequals - arg);
2741    
2742            sprintf(buff1, "%.*s", baselen, op->long_name);
2743            sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2744    
2745            if (strncmp(arg, buff1, arglen) == 0 ||
2746               strncmp(arg, buff2, arglen) == 0)
2747              {
2748              if (equals != NULL && argequals != NULL)
2749                {
2750                option_data = argequals;
2751                if (*option_data == '=')
2752                  {
2753                  option_data++;
2754                  longopwasequals = TRUE;
2755                  }
2756                }
2757              break;
2758              }
2759            }
2760          }
2761    
2762        if (op->one_char == 0)
2763          {
2764          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2765          pcregrep_exit(usage(2));
2766          }
2767        }
2768    
2769      /* Jeffrey Friedl's debugging harness uses these additional options which
2770      are not in the right form for putting in the option table because they use
2771      only one hyphen, yet are more than one character long. By putting them
2772      separately here, they will not get displayed as part of the help() output,
2773      but I don't think Jeffrey will care about that. */
2774    
2775    #ifdef JFRIEDL_DEBUG
2776      else if (strcmp(argv[i], "-pre") == 0) {
2777              jfriedl_prefix = argv[++i];
2778              continue;
2779      } else if (strcmp(argv[i], "-post") == 0) {
2780              jfriedl_postfix = argv[++i];
2781              continue;
2782      } else if (strcmp(argv[i], "-XT") == 0) {
2783              sscanf(argv[++i], "%d", &jfriedl_XT);
2784              continue;
2785      } else if (strcmp(argv[i], "-XR") == 0) {
2786              sscanf(argv[++i], "%d", &jfriedl_XR);
2787              continue;
2788      }
2789    #endif
2790    
2791    
2792      /* One-char options; many that have no data may be in a single argument; we
2793      continue till we hit the last one or one that needs data. */
2794    
2795      else
2796        {
2797        char *s = argv[i] + 1;
2798        longop = FALSE;
2799    
2800        while (*s != 0)
2801          {
2802          for (op = optionlist; op->one_char != 0; op++)
2803            {
2804            if (*s == op->one_char) break;
2805            }
2806          if (op->one_char == 0)
2807            {
2808            fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2809              *s, argv[i]);
2810            pcregrep_exit(usage(2));
2811            }
2812    
2813          option_data = s+1;
2814    
2815          /* Break out if this is the last character in the string; it's handled
2816          below like a single multi-char option. */
2817    
2818          if (*option_data == 0) break;
2819    
2820          /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2821          are used for ones that either have a numerical number or defaults, i.e.
2822          the data is optional. If a digit follows, there is data; if not, carry on
2823          with other single-character options in the same string. */
2824    
2825          if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2826            {
2827            if (isdigit((unsigned char)s[1])) break;
2828            }
2829          else   /* Check for an option with data */
2830            {
2831            if (op->type != OP_NODATA) break;
2832            }
2833    
2834          /* Handle a single-character option with no data, then loop for the
2835          next character in the string. */
2836    
2837          pcre_options = handle_option(*s++, pcre_options);
2838          }
2839        }
2840    
2841      /* At this point we should have op pointing to a matched option. If the type
2842      is NO_DATA, it means that there is no data, and the option might set
2843      something in the PCRE options. */
2844    
2845      if (op->type == OP_NODATA)
2846        {
2847        pcre_options = handle_option(op->one_char, pcre_options);
2848        continue;
2849        }
2850    
2851      /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2852      either has a value or defaults to something. It cannot have data in a
2853      separate item. At the moment, the only such options are "colo(u)r",
2854      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2855    
2856      if (*option_data == 0 &&
2857          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2858           op->type == OP_OP_NUMBERS))
2859        {
2860        switch (op->one_char)
2861          {
2862          case N_COLOUR:
2863          colour_option = (char *)"auto";
2864          break;
2865    
2866          case 'o':
2867          only_matching_last = add_number(0, only_matching_last);
2868          if (only_matching == NULL) only_matching = only_matching_last;
2869          break;
2870    
2871    #ifdef JFRIEDL_DEBUG
2872          case 'S':
2873          S_arg = 0;
2874          break;
2875    #endif
2876          }
2877        continue;
2878        }
2879    
2880      /* Otherwise, find the data string for the option. */
2881    
2882      if (*option_data == 0)
2883        {
2884        if (i >= argc - 1 || longopwasequals)
2885          {
2886          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2887          pcregrep_exit(usage(2));
2888          }
2889        option_data = argv[++i];
2890        }
2891    
2892      /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2893      added to a chain of numbers. */
2894    
2895      if (op->type == OP_OP_NUMBERS)
2896        {
2897        unsigned long int n = decode_number(option_data, op, longop);
2898        omdatastr *omd = (omdatastr *)op->dataptr;
2899        *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2900        if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2901        }
2902    
2903      /* If the option type is OP_PATLIST, it's the -e option, or one of the
2904      include/exclude options, which can be called multiple times to create lists
2905      of patterns. */
2906    
2907      else if (op->type == OP_PATLIST)
2908        {
2909        patdatastr *pd = (patdatastr *)op->dataptr;
2910        *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2911        if (*(pd->lastptr) == NULL) goto EXIT2;
2912        if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2913        }
2914    
2915      /* If the option type is OP_FILELIST, it's one of the options that names a
2916      file. */
2917    
2918      else if (op->type == OP_FILELIST)
2919        {
2920        fndatastr *fd = (fndatastr *)op->dataptr;
2921        fn = (fnstr *)malloc(sizeof(fnstr));
2922        if (fn == NULL)
2923          {
2924          fprintf(stderr, "pcregrep: malloc failed\n");
2925          goto EXIT2;
2926          }
2927        fn->next = NULL;
2928        fn->name = option_data;
2929        if (*(fd->anchor) == NULL)
2930          *(fd->anchor) = fn;
2931        else
2932          (*(fd->lastptr))->next = fn;
2933        *(fd->lastptr) = fn;
2934        }
2935    
2936      /* Handle OP_BINARY_FILES */
2937    
2938      else if (op->type == OP_BINFILES)
2939        {
2940        if (strcmp(option_data, "binary") == 0)
2941          binary_files = BIN_BINARY;
2942        else if (strcmp(option_data, "without-match") == 0)
2943          binary_files = BIN_NOMATCH;
2944        else if (strcmp(option_data, "text") == 0)
2945          binary_files = BIN_TEXT;
2946        else
2947          {
2948          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2949            option_data);
2950          pcregrep_exit(usage(2));
2951          }
2952        }
2953    
2954      /* Otherwise, deal with a single string or numeric data value. */
2955    
2956      else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2957               op->type != OP_OP_NUMBER)
2958        {
2959        *((char **)op->dataptr) = option_data;
2960        }
2961      else
2962        {
2963        unsigned long int n = decode_number(option_data, op, longop);
2964        if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2965          else *((int *)op->dataptr) = n;
2966        }
2967      }
2968    
2969    /* Options have been decoded. If -C was used, its value is used as a default
2970    for -A and -B. */
2971    
2972    if (both_context > 0)
2973      {
2974      if (after_context == 0) after_context = both_context;
2975      if (before_context == 0) before_context = both_context;
2976      }
2977    
2978    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2979    However, all three set show_only_matching because they display, each in their
2980    own way, only the data that has matched. */
2981    
2982    if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2983        (file_offsets && line_offsets))
2984      {
2985      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2986        "and/or --line-offsets\n");
2987      pcregrep_exit(usage(2));
2988      }
2989    
2990    if (only_matching != NULL || file_offsets || line_offsets)
2991      show_only_matching = TRUE;
2992    
2993    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2994    LC_ALL environment variable is set, and if so, use it. */
2995    
2996    if (locale == NULL)
2997      {
2998      locale = getenv("LC_ALL");
2999      locale_from = "LCC_ALL";
3000      }
3001    
3002    if (locale == NULL)
3003      {
3004      locale = getenv("LC_CTYPE");
3005      locale_from = "LC_CTYPE";
3006      }
3007    
3008    /* If a locale is set, use it to generate the tables the PCRE needs. Otherwise,
3009    pcretables==NULL, which causes the use of default tables. */
3010    
3011    if (locale != NULL)
3012      {
3013      if (setlocale(LC_CTYPE, locale) == NULL)
3014        {
3015        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
3016          locale, locale_from);
3017        goto EXIT2;
3018        }
3019      pcretables = pcre_maketables();
3020      }
3021    
3022    /* Sort out colouring */
3023    
3024    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
3025      {
3026      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
3027      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
3028      else
3029        {
3030        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
3031          colour_option);
3032        goto EXIT2;
3033        }
3034      if (do_colour)
3035        {
3036        char *cs = getenv("PCREGREP_COLOUR");
3037        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
3038        if (cs != NULL) colour_string = cs;
3039        }
3040      }
3041    
3042    /* Interpret the newline type; the default settings are Unix-like. */
3043    
3044    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
3045      {
3046      pcre_options |= PCRE_NEWLINE_CR;
3047      endlinetype = EL_CR;
3048      }
3049    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
3050      {
3051      pcre_options |= PCRE_NEWLINE_LF;
3052      endlinetype = EL_LF;
3053      }
3054    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
3055      {
3056      pcre_options |= PCRE_NEWLINE_CRLF;
3057      endlinetype = EL_CRLF;
3058      }
3059    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
3060      {
3061      pcre_options |= PCRE_NEWLINE_ANY;
3062      endlinetype = EL_ANY;
3063      }
3064    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
3065      {
3066      pcre_options |= PCRE_NEWLINE_ANYCRLF;
3067      endlinetype = EL_ANYCRLF;
3068      }
3069    else
3070      {
3071      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3072      goto EXIT2;
3073      }
3074    
3075    /* Interpret the text values for -d and -D */
3076    
3077    if (dee_option != NULL)
3078      {
3079      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3080      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3081      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3082      else
3083        {
3084        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3085        goto EXIT2;
3086        }
3087      }
3088    
3089    if (DEE_option != NULL)
3090      {
3091      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3092      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3093      else
3094        {
3095        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3096        goto EXIT2;
3097        }
3098      }
3099    
3100    /* Check the values for Jeffrey Friedl's debugging options. */
3101    
3102    #ifdef JFRIEDL_DEBUG
3103    if (S_arg > 9)
3104      {
3105      fprintf(stderr, "pcregrep: bad value for -S option\n");
3106      return 2;
3107      }
3108    if (jfriedl_XT != 0 || jfriedl_XR != 0)
3109      {
3110      if (jfriedl_XT == 0) jfriedl_XT = 1;
3111      if (jfriedl_XR == 0) jfriedl_XR = 1;
3112      }
3113    #endif
3114    
3115    /* Get memory for the main buffer. */
3116    
3117    bufsize = 3*bufthird;
3118    main_buffer = (char *)malloc(bufsize);
3119    
3120    if (main_buffer == NULL)
3121      {
3122      fprintf(stderr, "pcregrep: malloc failed\n");
3123      goto EXIT2;
3124      }
3125    
3126    /* If no patterns were provided by -e, and there are no files provided by -f,
3127    the first argument is the one and only pattern, and it must exist. */
3128    
3129    if (patterns == NULL && pattern_files == NULL)
3130      {
3131      if (i >= argc) return usage(2);
3132      patterns = patterns_last = add_pattern(argv[i++], NULL);
3133      if (patterns == NULL) goto EXIT2;
3134      }
3135    
3136    /* Compile the patterns that were provided on the command line, either by
3137    multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3138    after all the command-line options are read so that we know which PCRE options
3139    to use. When -F is used, compile_pattern() may add another block into the
3140    chain, so we must not access the next pointer till after the compile. */
3141    
3142    for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3143      {
3144      if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3145           (j == 1 && patterns->next == NULL)? 0 : j))
3146        goto EXIT2;
3147      }
3148    
3149    /* Read and compile the regular expressions that are provided in files. */
3150    
3151    for (fn = pattern_files; fn != NULL; fn = fn->next)
3152      {
3153      if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3154        goto EXIT2;
3155      }
3156    
3157    /* Study the regular expressions, as we will be running them many times. If an
3158    extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3159    returned, even if studying produces no data. */
3160    
3161    if (match_limit > 0 || match_limit_recursion > 0)
3162      study_options |= PCRE_STUDY_EXTRA_NEEDED;
3163    
3164    /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3165    
3166    #ifdef SUPPORT_PCREGREP_JIT
3167    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3168      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3169    #endif
3170    
3171    for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3172      {
3173      cp->hint = pcre_study(cp->compiled, study_options, &error);
3174      if (error != NULL)
3175        {
3176        char s[16];
3177        if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3178        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3179        goto EXIT2;
3180        }
3181    #ifdef SUPPORT_PCREGREP_JIT
3182      if (jit_stack != NULL && cp->hint != NULL)
3183        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3184    #endif
3185      }
3186    
3187    /* If --match-limit or --recursion-limit was set, put the value(s) into the
3188    pcre_extra block for each pattern. There will always be an extra block because
3189    of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3190    
3191    for (cp = patterns; cp != NULL; cp = cp->next)
3192      {
3193      if (match_limit > 0)
3194        {
3195        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3196        cp->hint->match_limit = match_limit;
3197        }
3198    
3199      if (match_limit_recursion > 0)
3200        {
3201        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3202        cp->hint->match_limit_recursion = match_limit_recursion;
3203        }
3204      }
3205    
3206    /* If there are include or exclude patterns read from the command line, compile
3207    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3208    0. */
3209    
3210    for (j = 0; j < 4; j++)
3211      {
3212      int k;
3213      for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3214        {
3215        if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3216             (k == 1 && cp->next == NULL)? 0 : k))
3217          goto EXIT2;
3218        }
3219      }
3220    
3221    /* Read and compile include/exclude patterns from files. */
3222    
3223    for (fn = include_from; fn != NULL; fn = fn->next)
3224      {
3225      if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3226        goto EXIT2;
3227      }
3228    
3229    for (fn = exclude_from; fn != NULL; fn = fn->next)
3230      {
3231      if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3232        goto EXIT2;
3233      }
3234    
3235    /* If there are no files that contain lists of files to search, and there are
3236    no file arguments, search stdin, and then exit. */
3237    
3238    if (file_lists == NULL && i >= argc)
3239      {
3240      rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3241        (filenames > FN_DEFAULT)? stdin_name : NULL);
3242      goto EXIT;
3243      }
3244    
3245    /* If any files that contains a list of files to search have been specified,
3246    read them line by line and search the given files. */
3247    
3248    for (fn = file_lists; fn != NULL; fn = fn->next)
3249      {
3250      char buffer[PATBUFSIZE];
3251      FILE *fl;
3252      if (strcmp(fn->name, "-") == 0) fl = stdin; else
3253        {
3254        fl = fopen(fn->name, "rb");
3255        if (fl == NULL)
3256          {
3257          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3258            strerror(errno));
3259          goto EXIT2;
3260          }
3261        }
3262      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3263        {
3264        int frc;
3265        char *end = buffer + (int)strlen(buffer);
3266        while (end > buffer && isspace(end[-1])) end--;
3267        *end = 0;
3268        if (*buffer != 0)
3269          {
3270          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3271          if (frc > 1) rc = frc;
3272            else if (frc == 0 && rc == 1) rc = 0;
3273          }
3274        }
3275      if (fl != stdin) fclose(fl);
3276      }
3277    
3278    /* After handling file-list, work through remaining arguments. Pass in the fact
3279    that there is only one argument at top level - this suppresses the file name if
3280    the argument is not a directory and filenames are not otherwise forced. */
3281    
3282    only_one_at_top = i == argc - 1 && file_lists == NULL;
3283    
3284    for (; i < argc; i++)
3285      {
3286      int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3287        only_one_at_top);
3288      if (frc > 1) rc = frc;
3289        else if (frc == 0 && rc == 1) rc = 0;
3290      }
3291    
3292    EXIT:
3293    #ifdef SUPPORT_PCREGREP_JIT
3294    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3295    #endif
3296    
3297    free(main_buffer);
3298    free((void *)pcretables);
3299    
3300    free_pattern_chain(patterns);
3301    free_pattern_chain(include_patterns);
3302    free_pattern_chain(include_dir_patterns);
3303    free_pattern_chain(exclude_patterns);
3304    free_pattern_chain(exclude_dir_patterns);
3305    
3306    free_file_chain(exclude_from);
3307    free_file_chain(include_from);
3308    free_file_chain(pattern_files);
3309    free_file_chain(file_lists);
3310    
3311    while (only_matching != NULL)
3312      {
3313      omstr *this = only_matching;
3314      only_matching = this->next;
3315      free(this);
3316      }
3317    
3318    pcregrep_exit(rc);
3319    
3320    EXIT2:
3321    rc = 2;
3322    goto EXIT;
3323  }  }
3324    
3325  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.1548

  ViewVC Help
Powered by ViewVC 1.1.5