/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 1039 by ph10, Thu Sep 13 16:39:03 2012 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2012 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 70  its pattern matching. On a Unix system i
70    
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define VERSION "2.0 01-Aug-2001"  #define OFFSET_SIZE 99
74  #define MAX_PATTERN_COUNT 100  
75    #if BUFSIZ > 8192
76    #define MAXPATLEN BUFSIZ
77    #else
78    #define MAXPATLEN 8192
79    #endif
80    
81    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
82    
83    /* Values for the "filenames" variable, which specifies options for file name
84    output. The order is important; it is assumed that a file name is wanted for
85    all values greater than FN_DEFAULT. */
86    
87    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88    
89    /* File reading styles */
90    
91    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
92    
93    /* Actions for the -d and -D options */
94    
95    enum { dee_READ, dee_SKIP, dee_RECURSE };
96    enum { DEE_READ, DEE_SKIP };
97    
98    /* Actions for special processing options (flag bits) */
99    
100    #define PO_WORD_MATCH     0x0001
101    #define PO_LINE_MATCH     0x0002
102    #define PO_FIXED_STRINGS  0x0004
103    
104    /* Line ending types */
105    
106    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107    
108    /* Binary file options */
109    
110    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111    
112    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113    environments), a warning is issued if the value of fwrite() is ignored.
114    Unfortunately, casting to (void) does not suppress the warning. To get round
115    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116    apply to fprintf(). */
117    
118    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119    
120    
121    
122  /*************************************************  /*************************************************
123  *               Global variables                 *  *               Global variables                 *
124  *************************************************/  *************************************************/
125    
126  static char *pattern_filename = NULL;  /* Jeffrey Friedl has some debugging requirements that are not part of the
127  static int  pattern_count = 0;  regular code. */
128  static pcre **pattern_list;  
129  static pcre_extra **hints_list;  #ifdef JFRIEDL_DEBUG
130    static int S_arg = -1;
131    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
132    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
133    static const char *jfriedl_prefix = "";
134    static const char *jfriedl_postfix = "";
135    #endif
136    
137    static int  endlinetype;
138    
139    static char *colour_string = (char *)"1;31";
140    static char *colour_option = NULL;
141    static char *dee_option = NULL;
142    static char *DEE_option = NULL;
143    static char *locale = NULL;
144    static char *main_buffer = NULL;
145    static char *newline = NULL;
146    static char *om_separator = (char *)"";
147    static char *stdin_name = (char *)"(standard input)";
148    
149    static const unsigned char *pcretables = NULL;
150    
151    static int after_context = 0;
152    static int before_context = 0;
153    static int binary_files = BIN_BINARY;
154    static int both_context = 0;
155    static int bufthird = PCREGREP_BUFSIZE;
156    static int bufsize = 3*PCREGREP_BUFSIZE;
157    
158    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
159    static int dee_action = dee_SKIP;
160    #else
161    static int dee_action = dee_READ;
162    #endif
163    
164    static int DEE_action = DEE_READ;
165    static int error_count = 0;
166    static int filenames = FN_DEFAULT;
167    static int pcre_options = 0;
168    static int process_options = 0;
169    
170    #ifdef SUPPORT_PCREGREP_JIT
171    static int study_options = PCRE_STUDY_JIT_COMPILE;
172    #else
173    static int study_options = 0;
174    #endif
175    
176    static unsigned long int match_limit = 0;
177    static unsigned long int match_limit_recursion = 0;
178    
179  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
180  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
181  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
182    static BOOL hyphenpending = FALSE;
183  static BOOL invert = FALSE;  static BOOL invert = FALSE;
184    static BOOL line_buffered = FALSE;
185    static BOOL line_offsets = FALSE;
186    static BOOL multiline = FALSE;
187  static BOOL number = FALSE;  static BOOL number = FALSE;
188  static BOOL recurse = FALSE;  static BOOL omit_zero_count = FALSE;
189    static BOOL resource_error = FALSE;
190    static BOOL quiet = FALSE;
191    static BOOL show_only_matching = FALSE;
192  static BOOL silent = FALSE;  static BOOL silent = FALSE;
193  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
194    
195    /* Structure for list of --only-matching capturing numbers. */
196    
197    typedef struct omstr {
198      struct omstr *next;
199      int groupnum;
200    } omstr;
201    
202    static omstr *only_matching = NULL;
203    static omstr *only_matching_last = NULL;
204    
205    /* Structure for holding the two variables that describe a number chain. */
206    
207    typedef struct omdatastr {
208      omstr **anchor;
209      omstr **lastptr;
210    } omdatastr;
211    
212    static omdatastr only_matching_data = { &only_matching, &only_matching_last };
213    
214    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
215    
216    typedef struct fnstr {
217      struct fnstr *next;
218      char *name;
219    } fnstr;
220    
221    static fnstr *exclude_from = NULL;
222    static fnstr *exclude_from_last = NULL;
223    static fnstr *include_from = NULL;
224    static fnstr *include_from_last = NULL;
225    
226    static fnstr *file_lists = NULL;
227    static fnstr *file_lists_last = NULL;
228    static fnstr *pattern_files = NULL;
229    static fnstr *pattern_files_last = NULL;
230    
231    /* Structure for holding the two variables that describe a file name chain. */
232    
233    typedef struct fndatastr {
234      fnstr **anchor;
235      fnstr **lastptr;
236    } fndatastr;
237    
238    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
239    static fndatastr include_from_data = { &include_from, &include_from_last };
240    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
241    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
242    
243    /* Structure for pattern and its compiled form; used for matching patterns and
244    also for include/exclude patterns. */
245    
246    typedef struct patstr {
247      struct patstr *next;
248      char *string;
249      pcre *compiled;
250      pcre_extra *hint;
251    } patstr;
252    
253    static patstr *patterns = NULL;
254    static patstr *patterns_last = NULL;
255    static patstr *include_patterns = NULL;
256    static patstr *include_patterns_last = NULL;
257    static patstr *exclude_patterns = NULL;
258    static patstr *exclude_patterns_last = NULL;
259    static patstr *include_dir_patterns = NULL;
260    static patstr *include_dir_patterns_last = NULL;
261    static patstr *exclude_dir_patterns = NULL;
262    static patstr *exclude_dir_patterns_last = NULL;
263    
264    /* Structure holding the two variables that describe a pattern chain. A pointer
265    to such structures is used for each appropriate option. */
266    
267    typedef struct patdatastr {
268      patstr **anchor;
269      patstr **lastptr;
270    } patdatastr;
271    
272    static patdatastr match_patdata = { &patterns, &patterns_last };
273    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
274    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
275    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
276    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
277    
278    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
279                                     &include_dir_patterns, &exclude_dir_patterns };
280    
281    static const char *incexname[4] = { "--include", "--exclude",
282                                        "--include-dir", "--exclude-dir" };
283    
284  /* Structure for options and list of them */  /* Structure for options and list of them */
285    
286    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
287           OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
288    
289  typedef struct option_item {  typedef struct option_item {
290      int type;
291    int one_char;    int one_char;
292    char *long_name;    void *dataptr;
293    char *help_text;    const char *long_name;
294      const char *help_text;
295  } option_item;  } option_item;
296    
297    /* Options without a single-letter equivalent get a negative value. This can be
298    used to identify them. */
299    
300    #define N_COLOUR       (-1)
301    #define N_EXCLUDE      (-2)
302    #define N_EXCLUDE_DIR  (-3)
303    #define N_HELP         (-4)
304    #define N_INCLUDE      (-5)
305    #define N_INCLUDE_DIR  (-6)
306    #define N_LABEL        (-7)
307    #define N_LOCALE       (-8)
308    #define N_NULL         (-9)
309    #define N_LOFFSETS     (-10)
310    #define N_FOFFSETS     (-11)
311    #define N_LBUFFER      (-12)
312    #define N_M_LIMIT      (-13)
313    #define N_M_LIMIT_REC  (-14)
314    #define N_BUFSIZE      (-15)
315    #define N_NOJIT        (-16)
316    #define N_FILE_LIST    (-17)
317    #define N_BINARY_FILES (-18)
318    #define N_EXCLUDE_FROM (-19)
319    #define N_INCLUDE_FROM (-20)
320    #define N_OM_SEPARATOR (-21)
321    
322  static option_item optionlist[] = {  static option_item optionlist[] = {
323    { -1,  "help",         "display this help and exit" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
324    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
325    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
326    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
327    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
328    { 'n', "line-number",  "print line number with output lines" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
329    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
330    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
331    { 'V', "version",      "print version information and exit" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
332    { 'v', "invert-match", "select non-matching lines" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
333    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
334    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
335    { 0,    NULL,           NULL }    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
336      { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
337      { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
338      { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
339      { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
340      { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
341      { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
342      { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
343      { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
344      { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
345    #ifdef SUPPORT_PCREGREP_JIT
346      { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
347    #else
348      { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
349    #endif
350      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
351      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
352      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
353      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
354      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
355      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
356      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
357      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
358      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
359      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
360      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
361      { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
362      { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
363      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
364      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
365      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
366      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
367      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
368      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
369      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
370      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
371    
372      /* These two were accidentally implemented with underscores instead of
373      hyphens in the option names. As this was not discovered for several releases,
374      the incorrect versions are left in the table for compatibility. However, the
375      --help function misses out any option that has an underscore in its name. */
376    
377      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
378      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
379    
380    #ifdef JFRIEDL_DEBUG
381      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
382    #endif
383      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
384      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
385      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
386      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
387      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
388      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
389      { OP_NODATA,    0,        NULL,               NULL,            NULL }
390  };  };
391    
392    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
393    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
394    that the combination of -w and -x has the same effect as -x on its own, so we
395    can treat them as the same. Note that the MAXPATLEN macro assumes the longest
396    prefix+suffix is 10 characters; if anything longer is added, it must be
397    adjusted. */
398    
399    static const char *prefix[] = {
400      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
401    
402    static const char *suffix[] = {
403      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
404    
405    /* UTF-8 tables - used only when the newline setting is "any". */
406    
407    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
408    
409    const char utf8_table4[] = {
410      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
411      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
412      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
413      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
414    
415    
416    
417    /*************************************************
418    *         Exit from the program                  *
419    *************************************************/
420    
421    /* If there has been a resource error, give a suitable message.
422    
423    Argument:  the return code
424    Returns:   does not return
425    */
426    
427    static void
428    pcregrep_exit(int rc)
429    {
430    if (resource_error)
431      {
432      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
433        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
434        PCRE_ERROR_JIT_STACKLIMIT);
435      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
436      }
437    exit(rc);
438    }
439    
440    
441    /*************************************************
442    *          Add item to chain of patterns         *
443    *************************************************/
444    
445    /* Used to add an item onto a chain, or just return an unconnected item if the
446    "after" argument is NULL.
447    
448    Arguments:
449      s          pattern string to add
450      after      if not NULL points to item to insert after
451    
452    Returns:     new pattern block
453    */
454    
455    static patstr *
456    add_pattern(char *s, patstr *after)
457    {
458    patstr *p = (patstr *)malloc(sizeof(patstr));
459    if (p == NULL)
460      {
461      fprintf(stderr, "pcregrep: malloc failed\n");
462      pcregrep_exit(2);
463      }
464    if (strlen(s) > MAXPATLEN)
465      {
466      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
467        MAXPATLEN);
468      return NULL;
469      }
470    p->next = NULL;
471    p->string = s;
472    p->compiled = NULL;
473    p->hint = NULL;
474    
475    if (after != NULL)
476      {
477      p->next = after->next;
478      after->next = p;
479      }
480    return p;
481    }
482    
483    
484    /*************************************************
485    *           Free chain of patterns               *
486    *************************************************/
487    
488    /* Used for several chains of patterns.
489    
490    Argument: pointer to start of chain
491    Returns:  nothing
492    */
493    
494    static void
495    free_pattern_chain(patstr *pc)
496    {
497    while (pc != NULL)
498      {
499      patstr *p = pc;
500      pc = p->next;
501      if (p->hint != NULL) pcre_free_study(p->hint);
502      if (p->compiled != NULL) pcre_free(p->compiled);
503      free(p);
504      }
505    }
506    
507    
508    /*************************************************
509    *           Free chain of file names             *
510    *************************************************/
511    
512    /*
513    Argument: pointer to start of chain
514    Returns:  nothing
515    */
516    
517    static void
518    free_file_chain(fnstr *fn)
519    {
520    while (fn != NULL)
521      {
522      fnstr *f = fn;
523      fn = f->next;
524      free(f);
525      }
526    }
527    
528    
529  /*************************************************  /*************************************************
530  *       Functions for directory scanning         *  *            OS-specific functions               *
531  *************************************************/  *************************************************/
532    
533  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
534  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
535    
536    
537  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
538    
539  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
540  #include <sys/types.h>  #include <sys/types.h>
541  #include <sys/stat.h>  #include <sys/stat.h>
542  #include <dirent.h>  #include <dirent.h>
543    
544  typedef DIR directory_type;  typedef DIR directory_type;
545    #define FILESEP '/'
546    
547  int  static int
548  isdirectory(char *filename)  isdirectory(char *filename)
549  {  {
550  struct stat statbuf;  struct stat statbuf;
551  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
552    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
553  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
554  }  }
555    
556  directory_type *  static directory_type *
557  opendirectory(char *filename)  opendirectory(char *filename)
558  {  {
559  return opendir(filename);  return opendir(filename);
560  }  }
561    
562  char *  static char *
563  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
564  {  {
565  for (;;)  for (;;)
# Line 108  for (;;) Line 569  for (;;)
569    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
570      return dent->d_name;      return dent->d_name;
571    }    }
572  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
573  }  }
574    
575  void  static void
576  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
577  {  {
578  closedir(dir);  closedir(dir);
579  }  }
580    
581    
582  #else  /************* Test for regular file in Unix **********/
   
583    
584  /************* Directory scanning when we can't do it ***********/  static int
585    isregfile(char *filename)
586    {
587    struct stat statbuf;
588    if (stat(filename, &statbuf) < 0)
589      return 1;        /* In the expectation that opening as a file will fail */
590    return (statbuf.st_mode & S_IFMT) == S_IFREG;
591    }
592    
 /* The type is void, and apart from isdirectory(), the functions do nothing. */  
593    
594  typedef void directory_type;  /************* Test for a terminal in Unix **********/
595    
596  int isdirectory(char *filename) { return FALSE; }  static BOOL
597  directory_type * opendirectory(char *filename) {}  is_stdout_tty(void)
598  char *readdirectory(directory_type *dir) {}  {
599  void closedirectory(directory_type *dir) {}  return isatty(fileno(stdout));
600    }
601    
602  #endif  static BOOL
603    is_file_tty(FILE *f)
604    {
605    return isatty(fileno(f));
606    }
607    
608    
609    /************* Directory scanning in Win32 ***********/
610    
611  #if ! HAVE_STRERROR  /* I (Philip Hazel) have no means of testing this code. It was contributed by
612  /*************************************************  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
613  *     Provide strerror() for non-ANSI libraries  *  when it did not exist. David Byron added a patch that moved the #include of
614  *************************************************/  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
615    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
616    undefined when it is indeed undefined. */
617    
618  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
 in their libraries, but can provide the same facility by this simple  
 alternative function. */  
619    
620  extern int   sys_nerr;  #ifndef STRICT
621  extern char *sys_errlist[];  # define STRICT
622    #endif
623    #ifndef WIN32_LEAN_AND_MEAN
624    # define WIN32_LEAN_AND_MEAN
625    #endif
626    
627  char *  #include <windows.h>
 strerror(int n)  
 {  
 if (n < 0 || n >= sys_nerr) return "unknown error number";  
 return sys_errlist[n];  
 }  
 #endif /* HAVE_STRERROR */  
628    
629    #ifndef INVALID_FILE_ATTRIBUTES
630    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
631    #endif
632    
633    typedef struct directory_type
634    {
635    HANDLE handle;
636    BOOL first;
637    WIN32_FIND_DATA data;
638    } directory_type;
639    
640  /*************************************************  #define FILESEP '/'
 *              Grep an individual file           *  
 *************************************************/  
641    
642  static int  int
643  pcregrep(FILE *in, char *name)  isdirectory(char *filename)
644  {  {
645  int rc = 1;  DWORD attr = GetFileAttributes(filename);
646  int linenumber = 0;  if (attr == INVALID_FILE_ATTRIBUTES)
647  int count = 0;    return 0;
648  int offsets[99];  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
649  char buffer[BUFSIZ];  }
650    
651  while (fgets(buffer, sizeof(buffer), in) != NULL)  directory_type *
652    opendirectory(char *filename)
653    {
654    size_t len;
655    char *pattern;
656    directory_type *dir;
657    DWORD err;
658    len = strlen(filename);
659    pattern = (char *)malloc(len + 3);
660    dir = (directory_type *)malloc(sizeof(*dir));
661    if ((pattern == NULL) || (dir == NULL))
662    {    {
663    BOOL match = FALSE;    fprintf(stderr, "pcregrep: malloc failed\n");
664    int i;    pcregrep_exit(2);
665    int length = (int)strlen(buffer);    }
666    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;  memcpy(pattern, filename, len);
667    linenumber++;  memcpy(&(pattern[len]), "\\*", 3);
668    dir->handle = FindFirstFile(pattern, &(dir->data));
669    if (dir->handle != INVALID_HANDLE_VALUE)
670      {
671      free(pattern);
672      dir->first = TRUE;
673      return dir;
674      }
675    err = GetLastError();
676    free(pattern);
677    free(dir);
678    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
679    return NULL;
680    }
681    
682    for (i = 0; !match && i < pattern_count; i++)  char *
683    readdirectory(directory_type *dir)
684    {
685    for (;;)
686      {
687      if (!dir->first)
688      {      {
689      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      if (!FindNextFile(dir->handle, &(dir->data)))
690        offsets, 99) >= 0;        return NULL;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
691      }      }
692      else
   if (match != invert)  
693      {      {
694      if (count_only) count++;      dir->first = FALSE;
695        }
696      else if (filenames_only)    if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
697        {      return dir->data.cFileName;
698        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);    }
699        return 0;  #ifndef _MSC_VER
700        }  return NULL;   /* Keep compiler happy; never executed */
701    #endif
702    }
703    
704      else if (silent) return 0;  void
705    closedirectory(directory_type *dir)
706    {
707    FindClose(dir->handle);
708    free(dir);
709    }
710    
     else  
       {  
       if (name != NULL) fprintf(stdout, "%s:", name);  
       if (number) fprintf(stdout, "%d:", linenumber);  
       fprintf(stdout, "%s\n", buffer);  
       }  
711    
712      rc = 0;  /************* Test for regular file in Win32 **********/
     }  
   }  
713    
714  if (count_only)  /* I don't know how to do this, or if it can be done; assume all paths are
715    {  regular if they are not directories. */
   if (name != NULL) fprintf(stdout, "%s:", name);  
   fprintf(stdout, "%d\n", count);  
   }  
716    
717  return rc;  int isregfile(char *filename)
718    {
719    return !isdirectory(filename);
720  }  }
721    
722    
723    /************* Test for a terminal in Win32 **********/
724    
725    /* I don't know how to do this; assume never */
726    
727  /*************************************************  static BOOL
728  *     Grep a file or recurse into a directory    *  is_stdout_tty(void)
729  *************************************************/  {
730    return FALSE;
731    }
732    
733  static int  static BOOL
734  grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  is_file_tty(FILE *f)
   BOOL only_one_at_top)  
735  {  {
736  int rc = 1;  return FALSE;
737  int sep;  }
 FILE *in;  
738    
 /* If the file is a directory and we are recursing, scan each file within it.  
 The scanning code is localized so it can be made system-specific. */  
739    
740  if ((sep = isdirectory(filename)) != 0 && recurse)  /************* Directory scanning when we can't do it ***********/
   {  
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
741    
742    if (dir == NULL)  /* The type is void, and apart from isdirectory(), the functions do nothing. */
     {  
     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  
       strerror(errno));  
     return 2;  
     }  
743    
744    while ((nextfile = readdirectory(dir)) != NULL)  #else
     {  
     int frc;  
     sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);  
     frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);  
     if (frc == 0 && rc == 1) rc = 0;  
     }  
745    
746    closedirectory(dir);  #define FILESEP 0
747    return rc;  typedef void directory_type;
   }  
748    
749  /* If the file is not a directory, or we are not recursing, scan it. If this is  int isdirectory(char *filename) { return 0; }
750  the first and only argument at top level, we don't show the file name.  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
751  Otherwise, control is via the show_filenames variable. */  char *readdirectory(directory_type *dir) { return (char*)0;}
752    void closedirectory(directory_type *dir) {}
753    
 in = fopen(filename, "r");  
 if (in == NULL)  
   {  
   fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));  
   return 2;  
   }  
754    
755  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);  /************* Test for regular when we can't do it **********/
 fclose(in);  
 return rc;  
 }  
756    
757    /* Assume all files are regular. */
758    
759    int isregfile(char *filename) { return 1; }
760    
761    
762  /*************************************************  /************* Test for a terminal when we can't do it **********/
 *                Usage function                  *  
 *************************************************/  
763    
764  static int  static BOOL
765  usage(int rc)  is_stdout_tty(void)
766  {  {
767  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  return FALSE;
768  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  }
769  return rc;  
770    static BOOL
771    is_file_tty(FILE *f)
772    {
773    return FALSE;
774  }  }
775    
776    #endif
777    
778    
779    
780    #ifndef HAVE_STRERROR
781  /*************************************************  /*************************************************
782  *                Help function                   *  *     Provide strerror() for non-ANSI libraries  *
783  *************************************************/  *************************************************/
784    
785  static void  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
786  help(void)  in their libraries, but can provide the same facility by this simple
787  {  alternative function. */
 option_item *op;  
788    
789  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  extern int   sys_nerr;
790  printf("Search for PATTERN in each FILE or standard input.\n");  extern char *sys_errlist[];
791  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
792    char *
793    strerror(int n)
794    {
795    if (n < 0 || n >= sys_nerr) return "unknown error number";
796    return sys_errlist[n];
797    }
798    #endif /* HAVE_STRERROR */
799    
800    
801    
802    /*************************************************
803    *                Usage function                  *
804    *************************************************/
805    
806    static int
807    usage(int rc)
808    {
809    option_item *op;
810    fprintf(stderr, "Usage: pcregrep [-");
811    for (op = optionlist; op->one_char != 0; op++)
812      {
813      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
814      }
815    fprintf(stderr, "] [long options] [pattern] [files]\n");
816    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
817      "options.\n");
818    return rc;
819    }
820    
821    
822    
823    /*************************************************
824    *                Help function                   *
825    *************************************************/
826    
827    static void
828    help(void)
829    {
830    option_item *op;
831    
832    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
833    printf("Search for PATTERN in each FILE or standard input.\n");
834    printf("PATTERN must be present if neither -e nor -f is used.\n");
835    printf("\"-\" can be used as a file name to mean STDIN.\n");
836    
837    #ifdef SUPPORT_LIBZ
838    printf("Files whose names end in .gz are read using zlib.\n");
839    #endif
840    
841    #ifdef SUPPORT_LIBBZ2
842    printf("Files whose names end in .bz2 are read using bzlib2.\n");
843    #endif
844    
845    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
846    printf("Other files and the standard input are read as plain files.\n\n");
847    #else
848    printf("All files are read as plain files, without any interpretation.\n\n");
849    #endif
850    
851    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
852  printf("Options:\n");  printf("Options:\n");
853    
854  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
855    {    {
856    int n;    int n;
857    char s[4];    char s[4];
858    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
859    printf("  %s --%s%n", s, op->long_name, &n);    /* Two options were accidentally implemented and documented with underscores
860    n = 30 - n;    instead of hyphens in their names, something that was not noticed for quite a
861      few releases. When fixing this, I left the underscored versions in the list
862      in case people were using them. However, we don't want to display them in the
863      help data. There are no other options that contain underscores, and we do not
864      expect ever to implement such options. Therefore, just omit any option that
865      contains an underscore. */
866    
867      if (strchr(op->long_name, '_') != NULL) continue;
868    
869      if (op->one_char > 0 && (op->long_name)[0] == 0)
870        n = 31 - printf("  -%c", op->one_char);
871      else
872        {
873        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
874          else strcpy(s, "   ");
875        n = 31 - printf("  %s --%s", s, op->long_name);
876        }
877    
878    if (n < 1) n = 1;    if (n < 1) n = 1;
879    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                           ", op->help_text);
880    }    }
881    
882  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
883  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
884  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("When reading patterns or file names from a file, trailing white\n");
885  printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("space is removed and blank lines are ignored.\n");
886    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
887    
888  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
889  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
890  }  }
891    
892    
893    
894    /*************************************************
895    *            Test exclude/includes               *
896    *************************************************/
897    
898    /* If any exclude pattern matches, the path is excluded. Otherwise, unless
899    there are no includes, the path must match an include pattern.
900    
901    Arguments:
902      path      the path to be matched
903      ip        the chain of include patterns
904      ep        the chain of exclude patterns
905    
906    Returns:    TRUE if the path is not excluded
907    */
908    
909    static BOOL
910    test_incexc(char *path, patstr *ip, patstr *ep)
911    {
912    int plen = strlen(path);
913    
914    for (; ep != NULL; ep = ep->next)
915      {
916      if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
917        return FALSE;
918      }
919    
920    if (ip == NULL) return TRUE;
921    
922    for (; ip != NULL; ip = ip->next)
923      {
924      if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
925        return TRUE;
926      }
927    
928    return FALSE;
929    }
930    
931    
932    
933    /*************************************************
934    *         Decode integer argument value          *
935    *************************************************/
936    
937    /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
938    because SunOS4 doesn't have it. This is used only for unpicking arguments, so
939    just keep it simple.
940    
941    Arguments:
942      option_data   the option data string
943      op            the option item (for error messages)
944      longop        TRUE if option given in long form
945    
946    Returns:        a long integer
947    */
948    
949    static long int
950    decode_number(char *option_data, option_item *op, BOOL longop)
951    {
952    unsigned long int n = 0;
953    char *endptr = option_data;
954    while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
955    while (isdigit((unsigned char)(*endptr)))
956      n = n * 10 + (int)(*endptr++ - '0');
957    if (toupper(*endptr) == 'K')
958      {
959      n *= 1024;
960      endptr++;
961      }
962    else if (toupper(*endptr) == 'M')
963      {
964      n *= 1024*1024;
965      endptr++;
966      }
967    
968    if (*endptr != 0)   /* Error */
969      {
970      if (longop)
971        {
972        char *equals = strchr(op->long_name, '=');
973        int nlen = (equals == NULL)? (int)strlen(op->long_name) :
974          (int)(equals - op->long_name);
975        fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
976          option_data, nlen, op->long_name);
977        }
978      else
979        fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
980          option_data, op->one_char);
981      pcregrep_exit(usage(2));
982      }
983    
984    return n;
985    }
986    
987    
988    
989    /*************************************************
990    *       Add item to a chain of numbers           *
991    *************************************************/
992    
993    /* Used to add an item onto a chain, or just return an unconnected item if the
994    "after" argument is NULL.
995    
996    Arguments:
997      n          the number to add
998      after      if not NULL points to item to insert after
999    
1000    Returns:     new number block
1001    */
1002    
1003    static omstr *
1004    add_number(int n, omstr *after)
1005    {
1006    omstr *om = (omstr *)malloc(sizeof(omstr));
1007    
1008    if (om == NULL)
1009      {
1010      fprintf(stderr, "pcregrep: malloc failed\n");
1011      pcregrep_exit(2);
1012      }
1013    om->next = NULL;
1014    om->groupnum = n;
1015    
1016    if (after != NULL)
1017      {
1018      om->next = after->next;
1019      after->next = om;
1020      }
1021    return om;
1022    }
1023    
1024    
1025    
1026    /*************************************************
1027    *            Read one line of input              *
1028    *************************************************/
1029    
1030    /* Normally, input is read using fread() into a large buffer, so many lines may
1031    be read at once. However, doing this for tty input means that no output appears
1032    until a lot of input has been typed. Instead, tty input is handled line by
1033    line. We cannot use fgets() for this, because it does not stop at a binary
1034    zero, and therefore there is no way of telling how many characters it has read,
1035    because there may be binary zeros embedded in the data.
1036    
1037    Arguments:
1038      buffer     the buffer to read into
1039      length     the maximum number of characters to read
1040      f          the file
1041    
1042    Returns:     the number of characters read, zero at end of file
1043    */
1044    
1045    static unsigned int
1046    read_one_line(char *buffer, int length, FILE *f)
1047    {
1048    int c;
1049    int yield = 0;
1050    while ((c = fgetc(f)) != EOF)
1051      {
1052      buffer[yield++] = c;
1053      if (c == '\n' || yield >= length) break;
1054      }
1055    return yield;
1056    }
1057    
1058    
1059    
1060    /*************************************************
1061    *             Find end of line                   *
1062    *************************************************/
1063    
1064    /* The length of the endline sequence that is found is set via lenptr. This may
1065    be zero at the very end of the file if there is no line-ending sequence there.
1066    
1067    Arguments:
1068      p         current position in line
1069      endptr    end of available data
1070      lenptr    where to put the length of the eol sequence
1071    
1072    Returns:    pointer after the last byte of the line,
1073                including the newline byte(s)
1074    */
1075    
1076    static char *
1077    end_of_line(char *p, char *endptr, int *lenptr)
1078    {
1079    switch(endlinetype)
1080      {
1081      default:      /* Just in case */
1082      case EL_LF:
1083      while (p < endptr && *p != '\n') p++;
1084      if (p < endptr)
1085        {
1086        *lenptr = 1;
1087        return p + 1;
1088        }
1089      *lenptr = 0;
1090      return endptr;
1091    
1092      case EL_CR:
1093      while (p < endptr && *p != '\r') p++;
1094      if (p < endptr)
1095        {
1096        *lenptr = 1;
1097        return p + 1;
1098        }
1099      *lenptr = 0;
1100      return endptr;
1101    
1102      case EL_CRLF:
1103      for (;;)
1104        {
1105        while (p < endptr && *p != '\r') p++;
1106        if (++p >= endptr)
1107          {
1108          *lenptr = 0;
1109          return endptr;
1110          }
1111        if (*p == '\n')
1112          {
1113          *lenptr = 2;
1114          return p + 1;
1115          }
1116        }
1117      break;
1118    
1119      case EL_ANYCRLF:
1120      while (p < endptr)
1121        {
1122        int extra = 0;
1123        register int c = *((unsigned char *)p);
1124    
1125        if (utf8 && c >= 0xc0)
1126          {
1127          int gcii, gcss;
1128          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1129          gcss = 6*extra;
1130          c = (c & utf8_table3[extra]) << gcss;
1131          for (gcii = 1; gcii <= extra; gcii++)
1132            {
1133            gcss -= 6;
1134            c |= (p[gcii] & 0x3f) << gcss;
1135            }
1136          }
1137    
1138        p += 1 + extra;
1139    
1140        switch (c)
1141          {
1142          case '\n':
1143          *lenptr = 1;
1144          return p;
1145    
1146          case '\r':
1147          if (p < endptr && *p == '\n')
1148            {
1149            *lenptr = 2;
1150            p++;
1151            }
1152          else *lenptr = 1;
1153          return p;
1154    
1155          default:
1156          break;
1157          }
1158        }   /* End of loop for ANYCRLF case */
1159    
1160      *lenptr = 0;  /* Must have hit the end */
1161      return endptr;
1162    
1163      case EL_ANY:
1164      while (p < endptr)
1165        {
1166        int extra = 0;
1167        register int c = *((unsigned char *)p);
1168    
1169        if (utf8 && c >= 0xc0)
1170          {
1171          int gcii, gcss;
1172          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1173          gcss = 6*extra;
1174          c = (c & utf8_table3[extra]) << gcss;
1175          for (gcii = 1; gcii <= extra; gcii++)
1176            {
1177            gcss -= 6;
1178            c |= (p[gcii] & 0x3f) << gcss;
1179            }
1180          }
1181    
1182        p += 1 + extra;
1183    
1184        switch (c)
1185          {
1186          case '\n':    /* LF */
1187          case '\v':    /* VT */
1188          case '\f':    /* FF */
1189          *lenptr = 1;
1190          return p;
1191    
1192          case '\r':    /* CR */
1193          if (p < endptr && *p == '\n')
1194            {
1195            *lenptr = 2;
1196            p++;
1197            }
1198          else *lenptr = 1;
1199          return p;
1200    
1201    #ifndef EBCDIC
1202          case 0x85:    /* Unicode NEL */
1203          *lenptr = utf8? 2 : 1;
1204          return p;
1205    
1206          case 0x2028:  /* Unicode LS */
1207          case 0x2029:  /* Unicode PS */
1208          *lenptr = 3;
1209          return p;
1210    #endif  /* Not EBCDIC */
1211    
1212          default:
1213          break;
1214          }
1215        }   /* End of loop for ANY case */
1216    
1217      *lenptr = 0;  /* Must have hit the end */
1218      return endptr;
1219      }     /* End of overall switch */
1220    }
1221    
1222    
1223    
1224    /*************************************************
1225    *         Find start of previous line            *
1226    *************************************************/
1227    
1228    /* This is called when looking back for before lines to print.
1229    
1230    Arguments:
1231      p         start of the subsequent line
1232      startptr  start of available data
1233    
1234    Returns:    pointer to the start of the previous line
1235    */
1236    
1237    static char *
1238    previous_line(char *p, char *startptr)
1239    {
1240    switch(endlinetype)
1241      {
1242      default:      /* Just in case */
1243      case EL_LF:
1244      p--;
1245      while (p > startptr && p[-1] != '\n') p--;
1246      return p;
1247    
1248      case EL_CR:
1249      p--;
1250      while (p > startptr && p[-1] != '\n') p--;
1251      return p;
1252    
1253      case EL_CRLF:
1254      for (;;)
1255        {
1256        p -= 2;
1257        while (p > startptr && p[-1] != '\n') p--;
1258        if (p <= startptr + 1 || p[-2] == '\r') return p;
1259        }
1260      return p;   /* But control should never get here */
1261    
1262      case EL_ANY:
1263      case EL_ANYCRLF:
1264      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1265      if (utf8) while ((*p & 0xc0) == 0x80) p--;
1266    
1267      while (p > startptr)
1268        {
1269        register int c;
1270        char *pp = p - 1;
1271    
1272        if (utf8)
1273          {
1274          int extra = 0;
1275          while ((*pp & 0xc0) == 0x80) pp--;
1276          c = *((unsigned char *)pp);
1277          if (c >= 0xc0)
1278            {
1279            int gcii, gcss;
1280            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1281            gcss = 6*extra;
1282            c = (c & utf8_table3[extra]) << gcss;
1283            for (gcii = 1; gcii <= extra; gcii++)
1284              {
1285              gcss -= 6;
1286              c |= (pp[gcii] & 0x3f) << gcss;
1287              }
1288            }
1289          }
1290        else c = *((unsigned char *)pp);
1291    
1292        if (endlinetype == EL_ANYCRLF) switch (c)
1293          {
1294          case '\n':    /* LF */
1295          case '\r':    /* CR */
1296          return p;
1297    
1298          default:
1299          break;
1300          }
1301    
1302        else switch (c)
1303          {
1304          case '\n':    /* LF */
1305          case '\v':    /* VT */
1306          case '\f':    /* FF */
1307          case '\r':    /* CR */
1308    #ifndef EBCDIE
1309          case 0x85:    /* Unicode NEL */
1310          case 0x2028:  /* Unicode LS */
1311          case 0x2029:  /* Unicode PS */
1312    #endif  /* Not EBCDIC */
1313          return p;
1314    
1315          default:
1316          break;
1317          }
1318    
1319        p = pp;  /* Back one character */
1320        }        /* End of loop for ANY case */
1321    
1322      return startptr;  /* Hit start of data */
1323      }     /* End of overall switch */
1324    }
1325    
1326    
1327    
1328    
1329    
1330    /*************************************************
1331    *       Print the previous "after" lines         *
1332    *************************************************/
1333    
1334    /* This is called if we are about to lose said lines because of buffer filling,
1335    and at the end of the file. The data in the line is written using fwrite() so
1336    that a binary zero does not terminate it.
1337    
1338    Arguments:
1339      lastmatchnumber   the number of the last matching line, plus one
1340      lastmatchrestart  where we restarted after the last match
1341      endptr            end of available data
1342      printname         filename for printing
1343    
1344    Returns:            nothing
1345    */
1346    
1347    static void
1348    do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1349      char *printname)
1350    {
1351    if (after_context > 0 && lastmatchnumber > 0)
1352      {
1353      int count = 0;
1354      while (lastmatchrestart < endptr && count++ < after_context)
1355        {
1356        int ellength;
1357        char *pp = lastmatchrestart;
1358        if (printname != NULL) fprintf(stdout, "%s-", printname);
1359        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1360        pp = end_of_line(pp, endptr, &ellength);
1361        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1362        lastmatchrestart = pp;
1363        }
1364      hyphenpending = TRUE;
1365      }
1366    }
1367    
1368    
1369    
1370    /*************************************************
1371    *   Apply patterns to subject till one matches   *
1372    *************************************************/
1373    
1374    /* This function is called to run through all patterns, looking for a match. It
1375    is used multiple times for the same subject when colouring is enabled, in order
1376    to find all possible matches.
1377    
1378    Arguments:
1379      matchptr     the start of the subject
1380      length       the length of the subject to match
1381      startoffset  where to start matching
1382      offsets      the offets vector to fill in
1383      mrc          address of where to put the result of pcre_exec()
1384    
1385    Returns:      TRUE if there was a match
1386                  FALSE if there was no match
1387                  invert if there was a non-fatal error
1388    */
1389    
1390    static BOOL
1391    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
1392      int *mrc)
1393    {
1394    int i;
1395    size_t slen = length;
1396    patstr *p = patterns;
1397    const char *msg = "this text:\n\n";
1398    
1399    if (slen > 200)
1400      {
1401      slen = 200;
1402      msg = "text that starts:\n\n";
1403      }
1404    for (i = 1; p != NULL; p = p->next, i++)
1405      {
1406      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1407        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1408      if (*mrc >= 0) return TRUE;
1409      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1410      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1411      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1412      fprintf(stderr, "%s", msg);
1413      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1414      fprintf(stderr, "\n\n");
1415      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1416          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1417        resource_error = TRUE;
1418      if (error_count++ > 20)
1419        {
1420        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1421        pcregrep_exit(2);
1422        }
1423      return invert;    /* No more matching; don't show the line again */
1424      }
1425    
1426    return FALSE;  /* No match, no errors */
1427    }
1428    
1429    
1430    
1431    /*************************************************
1432    *            Grep an individual file             *
1433    *************************************************/
1434    
1435    /* This is called from grep_or_recurse() below. It uses a buffer that is three
1436    times the value of bufthird. The matching point is never allowed to stray into
1437    the top third of the buffer, thus keeping more of the file available for
1438    context printing or for multiline scanning. For large files, the pointer will
1439    be in the middle third most of the time, so the bottom third is available for
1440    "before" context printing.
1441    
1442    Arguments:
1443      handle       the fopened FILE stream for a normal file
1444                   the gzFile pointer when reading is via libz
1445                   the BZFILE pointer when reading is via libbz2
1446      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1447      filename     the file name or NULL (for errors)
1448      printname    the file name if it is to be printed for each match
1449                   or NULL if the file name is not to be printed
1450                   it cannot be NULL if filenames[_nomatch]_only is set
1451    
1452    Returns:       0 if there was at least one match
1453                   1 otherwise (no matches)
1454                   2 if an overlong line is encountered
1455                   3 if there is a read error on a .bz2 file
1456    */
1457    
1458    static int
1459    pcregrep(void *handle, int frtype, char *filename, char *printname)
1460    {
1461    int rc = 1;
1462    int linenumber = 1;
1463    int lastmatchnumber = 0;
1464    int count = 0;
1465    int filepos = 0;
1466    int offsets[OFFSET_SIZE];
1467    char *lastmatchrestart = NULL;
1468    char *ptr = main_buffer;
1469    char *endptr;
1470    size_t bufflength;
1471    BOOL binary = FALSE;
1472    BOOL endhyphenpending = FALSE;
1473    BOOL input_line_buffered = line_buffered;
1474    FILE *in = NULL;                    /* Ensure initialized */
1475    
1476    #ifdef SUPPORT_LIBZ
1477    gzFile ingz = NULL;
1478    #endif
1479    
1480    #ifdef SUPPORT_LIBBZ2
1481    BZFILE *inbz2 = NULL;
1482    #endif
1483    
1484    
1485    /* Do the first read into the start of the buffer and set up the pointer to end
1486    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1487    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1488    fail. */
1489    
1490    #ifdef SUPPORT_LIBZ
1491    if (frtype == FR_LIBZ)
1492      {
1493      ingz = (gzFile)handle;
1494      bufflength = gzread (ingz, main_buffer, bufsize);
1495      }
1496    else
1497    #endif
1498    
1499    #ifdef SUPPORT_LIBBZ2
1500    if (frtype == FR_LIBBZ2)
1501      {
1502      inbz2 = (BZFILE *)handle;
1503      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1504      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1505      }                                    /* without the cast it is unsigned. */
1506    else
1507    #endif
1508    
1509      {
1510      in = (FILE *)handle;
1511      if (is_file_tty(in)) input_line_buffered = TRUE;
1512      bufflength = input_line_buffered?
1513        read_one_line(main_buffer, bufsize, in) :
1514        fread(main_buffer, 1, bufsize, in);
1515      }
1516    
1517    endptr = main_buffer + bufflength;
1518    
1519    /* Unless binary-files=text, see if we have a binary file. This uses the same
1520    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1521    file. */
1522    
1523    if (binary_files != BIN_TEXT)
1524      {
1525      binary =
1526        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1527      if (binary && binary_files == BIN_NOMATCH) return 1;
1528      }
1529    
1530    /* Loop while the current pointer is not at the end of the file. For large
1531    files, endptr will be at the end of the buffer when we are in the middle of the
1532    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1533    way, the buffer is shifted left and re-filled. */
1534    
1535    while (ptr < endptr)
1536      {
1537      int endlinelength;
1538      int mrc = 0;
1539      int startoffset = 0;
1540      BOOL match;
1541      char *matchptr = ptr;
1542      char *t = ptr;
1543      size_t length, linelength;
1544    
1545      /* At this point, ptr is at the start of a line. We need to find the length
1546      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1547      length remainder of the data in the buffer. Otherwise, it is the length of
1548      the next line, excluding the terminating newline. After matching, we always
1549      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1550      option is used for compiling, so that any match is constrained to be in the
1551      first line. */
1552    
1553      t = end_of_line(t, endptr, &endlinelength);
1554      linelength = t - ptr - endlinelength;
1555      length = multiline? (size_t)(endptr - ptr) : linelength;
1556    
1557      /* Check to see if the line we are looking at extends right to the very end
1558      of the buffer without a line terminator. This means the line is too long to
1559      handle. */
1560    
1561      if (endlinelength == 0 && t == main_buffer + bufsize)
1562        {
1563        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1564                        "pcregrep: check the --buffer-size option\n",
1565                        linenumber,
1566                        (filename == NULL)? "" : " of file ",
1567                        (filename == NULL)? "" : filename);
1568        return 2;
1569        }
1570    
1571      /* Extra processing for Jeffrey Friedl's debugging. */
1572    
1573    #ifdef JFRIEDL_DEBUG
1574      if (jfriedl_XT || jfriedl_XR)
1575      {
1576          #include <sys/time.h>
1577          #include <time.h>
1578          struct timeval start_time, end_time;
1579          struct timezone dummy;
1580          int i;
1581    
1582          if (jfriedl_XT)
1583          {
1584              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1585              const char *orig = ptr;
1586              ptr = malloc(newlen + 1);
1587              if (!ptr) {
1588                      printf("out of memory");
1589                      pcregrep_exit(2);
1590              }
1591              endptr = ptr;
1592              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1593              for (i = 0; i < jfriedl_XT; i++) {
1594                      strncpy(endptr, orig,  length);
1595                      endptr += length;
1596              }
1597              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1598              length = newlen;
1599          }
1600    
1601          if (gettimeofday(&start_time, &dummy) != 0)
1602                  perror("bad gettimeofday");
1603    
1604    
1605          for (i = 0; i < jfriedl_XR; i++)
1606              match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1607                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1608    
1609          if (gettimeofday(&end_time, &dummy) != 0)
1610                  perror("bad gettimeofday");
1611    
1612          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1613                          -
1614                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1615    
1616          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1617          return 0;
1618      }
1619    #endif
1620    
1621      /* We come back here after a match when show_only_matching is set, in order
1622      to find any further matches in the same line. This applies to
1623      --only-matching, --file-offsets, and --line-offsets. */
1624    
1625      ONLY_MATCHING_RESTART:
1626    
1627      /* Run through all the patterns until one matches or there is an error other
1628      than NOMATCH. This code is in a subroutine so that it can be re-used for
1629      finding subsequent matches when colouring matched lines. */
1630    
1631      match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1632    
1633      /* If it's a match or a not-match (as required), do what's wanted. */
1634    
1635      if (match != invert)
1636        {
1637        BOOL hyphenprinted = FALSE;
1638    
1639        /* We've failed if we want a file that doesn't have any matches. */
1640    
1641        if (filenames == FN_NOMATCH_ONLY) return 1;
1642    
1643        /* Just count if just counting is wanted. */
1644    
1645        if (count_only) count++;
1646    
1647        /* When handling a binary file and binary-files==binary, the "binary"
1648        variable will be set true (it's false in all other cases). In this
1649        situation we just want to output the file name. No need to scan further. */
1650    
1651        else if (binary)
1652          {
1653          fprintf(stdout, "Binary file %s matches\n", filename);
1654          return 0;
1655          }
1656    
1657        /* If all we want is a file name, there is no need to scan any more lines
1658        in the file. */
1659    
1660        else if (filenames == FN_MATCH_ONLY)
1661          {
1662          fprintf(stdout, "%s\n", printname);
1663          return 0;
1664          }
1665    
1666        /* Likewise, if all we want is a yes/no answer. */
1667    
1668        else if (quiet) return 0;
1669    
1670        /* The --only-matching option prints just the substring that matched,
1671        and/or one or more captured portions of it, as long as these strings are
1672        not empty. The --file-offsets and --line-offsets options output offsets for
1673        the matching substring (all three set show_only_matching). None of these
1674        mutually exclusive options prints any context. Afterwards, adjust the start
1675        and then jump back to look for further matches in the same line. If we are
1676        in invert mode, however, nothing is printed and we do not restart - this
1677        could still be useful because the return code is set. */
1678    
1679        else if (show_only_matching)
1680          {
1681          if (!invert)
1682            {
1683            if (printname != NULL) fprintf(stdout, "%s:", printname);
1684            if (number) fprintf(stdout, "%d:", linenumber);
1685    
1686            /* Handle --line-offsets */
1687    
1688            if (line_offsets)
1689              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1690                offsets[1] - offsets[0]);
1691    
1692            /* Handle --file-offsets */
1693    
1694            else if (file_offsets)
1695              fprintf(stdout, "%d,%d\n",
1696                (int)(filepos + matchptr + offsets[0] - ptr),
1697                offsets[1] - offsets[0]);
1698    
1699            /* Handle --only-matching, which may occur many times */
1700    
1701            else
1702              {
1703              BOOL printed = FALSE;
1704              omstr *om;
1705    
1706              for (om = only_matching; om != NULL; om = om->next)
1707                {
1708                int n = om->groupnum;
1709                if (n < mrc)
1710                  {
1711                  int plen = offsets[2*n + 1] - offsets[2*n];
1712                  if (plen > 0)
1713                    {
1714                    if (printed) fprintf(stdout, "%s", om_separator);
1715                    if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1716                    FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1717                    if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1718                    printed = TRUE;
1719                    }
1720                  }
1721                }
1722    
1723              if (printed || printname != NULL || number) fprintf(stdout, "\n");
1724              }
1725    
1726            /* Prepare to repeat to find the next match */
1727    
1728            match = FALSE;
1729            if (line_buffered) fflush(stdout);
1730            rc = 0;                      /* Had some success */
1731            startoffset = offsets[1];    /* Restart after the match */
1732            goto ONLY_MATCHING_RESTART;
1733            }
1734          }
1735    
1736        /* This is the default case when none of the above options is set. We print
1737        the matching lines(s), possibly preceded and/or followed by other lines of
1738        context. */
1739    
1740        else
1741          {
1742          /* See if there is a requirement to print some "after" lines from a
1743          previous match. We never print any overlaps. */
1744    
1745          if (after_context > 0 && lastmatchnumber > 0)
1746            {
1747            int ellength;
1748            int linecount = 0;
1749            char *p = lastmatchrestart;
1750    
1751            while (p < ptr && linecount < after_context)
1752              {
1753              p = end_of_line(p, ptr, &ellength);
1754              linecount++;
1755              }
1756    
1757            /* It is important to advance lastmatchrestart during this printing so
1758            that it interacts correctly with any "before" printing below. Print
1759            each line's data using fwrite() in case there are binary zeroes. */
1760    
1761            while (lastmatchrestart < p)
1762              {
1763              char *pp = lastmatchrestart;
1764              if (printname != NULL) fprintf(stdout, "%s-", printname);
1765              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1766              pp = end_of_line(pp, endptr, &ellength);
1767              FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1768              lastmatchrestart = pp;
1769              }
1770            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1771            }
1772    
1773          /* If there were non-contiguous lines printed above, insert hyphens. */
1774    
1775          if (hyphenpending)
1776            {
1777            fprintf(stdout, "--\n");
1778            hyphenpending = FALSE;
1779            hyphenprinted = TRUE;
1780            }
1781    
1782          /* See if there is a requirement to print some "before" lines for this
1783          match. Again, don't print overlaps. */
1784    
1785          if (before_context > 0)
1786            {
1787            int linecount = 0;
1788            char *p = ptr;
1789    
1790            while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1791                   linecount < before_context)
1792              {
1793              linecount++;
1794              p = previous_line(p, main_buffer);
1795              }
1796    
1797            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1798              fprintf(stdout, "--\n");
1799    
1800            while (p < ptr)
1801              {
1802              int ellength;
1803              char *pp = p;
1804              if (printname != NULL) fprintf(stdout, "%s-", printname);
1805              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1806              pp = end_of_line(pp, endptr, &ellength);
1807              FWRITE(p, 1, pp - p, stdout);
1808              p = pp;
1809              }
1810            }
1811    
1812          /* Now print the matching line(s); ensure we set hyphenpending at the end
1813          of the file if any context lines are being output. */
1814    
1815          if (after_context > 0 || before_context > 0)
1816            endhyphenpending = TRUE;
1817    
1818          if (printname != NULL) fprintf(stdout, "%s:", printname);
1819          if (number) fprintf(stdout, "%d:", linenumber);
1820    
1821          /* In multiline mode, we want to print to the end of the line in which
1822          the end of the matched string is found, so we adjust linelength and the
1823          line number appropriately, but only when there actually was a match
1824          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1825          the match will always be before the first newline sequence. */
1826    
1827          if (multiline & !invert)
1828            {
1829            char *endmatch = ptr + offsets[1];
1830            t = ptr;
1831            while (t < endmatch)
1832              {
1833              t = end_of_line(t, endptr, &endlinelength);
1834              if (t < endmatch) linenumber++; else break;
1835              }
1836            linelength = t - ptr - endlinelength;
1837            }
1838    
1839          /*** NOTE: Use only fwrite() to output the data line, so that binary
1840          zeroes are treated as just another data character. */
1841    
1842          /* This extra option, for Jeffrey Friedl's debugging requirements,
1843          replaces the matched string, or a specific captured string if it exists,
1844          with X. When this happens, colouring is ignored. */
1845    
1846    #ifdef JFRIEDL_DEBUG
1847          if (S_arg >= 0 && S_arg < mrc)
1848            {
1849            int first = S_arg * 2;
1850            int last  = first + 1;
1851            FWRITE(ptr, 1, offsets[first], stdout);
1852            fprintf(stdout, "X");
1853            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1854            }
1855          else
1856    #endif
1857    
1858          /* We have to split the line(s) up if colouring, and search for further
1859          matches, but not of course if the line is a non-match. */
1860    
1861          if (do_colour && !invert)
1862            {
1863            int plength;
1864            FWRITE(ptr, 1, offsets[0], stdout);
1865            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1866            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1867            fprintf(stdout, "%c[00m", 0x1b);
1868            for (;;)
1869              {
1870              startoffset = offsets[1];
1871              if (startoffset >= (int)linelength + endlinelength ||
1872                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1873                break;
1874              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1875              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1876              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1877              fprintf(stdout, "%c[00m", 0x1b);
1878              }
1879    
1880            /* In multiline mode, we may have already printed the complete line
1881            and its line-ending characters (if they matched the pattern), so there
1882            may be no more to print. */
1883    
1884            plength = (int)((linelength + endlinelength) - startoffset);
1885            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1886            }
1887    
1888          /* Not colouring; no need to search for further matches */
1889    
1890          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1891          }
1892    
1893        /* End of doing what has to be done for a match. If --line-buffered was
1894        given, flush the output. */
1895    
1896        if (line_buffered) fflush(stdout);
1897        rc = 0;    /* Had some success */
1898    
1899        /* Remember where the last match happened for after_context. We remember
1900        where we are about to restart, and that line's number. */
1901    
1902        lastmatchrestart = ptr + linelength + endlinelength;
1903        lastmatchnumber = linenumber + 1;
1904        }
1905    
1906      /* For a match in multiline inverted mode (which of course did not cause
1907      anything to be printed), we have to move on to the end of the match before
1908      proceeding. */
1909    
1910      if (multiline && invert && match)
1911        {
1912        int ellength;
1913        char *endmatch = ptr + offsets[1];
1914        t = ptr;
1915        while (t < endmatch)
1916          {
1917          t = end_of_line(t, endptr, &ellength);
1918          if (t <= endmatch) linenumber++; else break;
1919          }
1920        endmatch = end_of_line(endmatch, endptr, &ellength);
1921        linelength = endmatch - ptr - ellength;
1922        }
1923    
1924      /* Advance to after the newline and increment the line number. The file
1925      offset to the current line is maintained in filepos. */
1926    
1927      ptr += linelength + endlinelength;
1928      filepos += (int)(linelength + endlinelength);
1929      linenumber++;
1930    
1931      /* If input is line buffered, and the buffer is not yet full, read another
1932      line and add it into the buffer. */
1933    
1934      if (input_line_buffered && bufflength < (size_t)bufsize)
1935        {
1936        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1937        bufflength += add;
1938        endptr += add;
1939        }
1940    
1941      /* If we haven't yet reached the end of the file (the buffer is full), and
1942      the current point is in the top 1/3 of the buffer, slide the buffer down by
1943      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1944      about to be lost, print them. */
1945    
1946      if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1947        {
1948        if (after_context > 0 &&
1949            lastmatchnumber > 0 &&
1950            lastmatchrestart < main_buffer + bufthird)
1951          {
1952          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1953          lastmatchnumber = 0;
1954          }
1955    
1956        /* Now do the shuffle */
1957    
1958        memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1959        ptr -= bufthird;
1960    
1961    #ifdef SUPPORT_LIBZ
1962        if (frtype == FR_LIBZ)
1963          bufflength = 2*bufthird +
1964            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1965        else
1966    #endif
1967    
1968    #ifdef SUPPORT_LIBBZ2
1969        if (frtype == FR_LIBBZ2)
1970          bufflength = 2*bufthird +
1971            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1972        else
1973    #endif
1974    
1975        bufflength = 2*bufthird +
1976          (input_line_buffered?
1977           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1978           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1979        endptr = main_buffer + bufflength;
1980    
1981        /* Adjust any last match point */
1982    
1983        if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1984        }
1985      }     /* Loop through the whole file */
1986    
1987    /* End of file; print final "after" lines if wanted; do_after_lines sets
1988    hyphenpending if it prints something. */
1989    
1990    if (!show_only_matching && !count_only)
1991      {
1992      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1993      hyphenpending |= endhyphenpending;
1994      }
1995    
1996    /* Print the file name if we are looking for those without matches and there
1997    were none. If we found a match, we won't have got this far. */
1998    
1999    if (filenames == FN_NOMATCH_ONLY)
2000      {
2001      fprintf(stdout, "%s\n", printname);
2002      return 0;
2003      }
2004    
2005    /* Print the match count if wanted */
2006    
2007    if (count_only)
2008      {
2009      if (count > 0 || !omit_zero_count)
2010        {
2011        if (printname != NULL && filenames != FN_NONE)
2012          fprintf(stdout, "%s:", printname);
2013        fprintf(stdout, "%d\n", count);
2014        }
2015      }
2016    
2017    return rc;
2018    }
2019    
2020    
2021    
2022    /*************************************************
2023    *     Grep a file or recurse into a directory    *
2024    *************************************************/
2025    
2026    /* Given a path name, if it's a directory, scan all the files if we are
2027    recursing; if it's a file, grep it.
2028    
2029    Arguments:
2030      pathname          the path to investigate
2031      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
2032      only_one_at_top   TRUE if the path is the only one at toplevel
2033    
2034    Returns:  -1 the file/directory was skipped
2035               0 if there was at least one match
2036               1 if there were no matches
2037               2 there was some kind of error
2038    
2039    However, file opening failures are suppressed if "silent" is set.
2040    */
2041    
2042    static int
2043    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2044    {
2045    int rc = 1;
2046    int frtype;
2047    void *handle;
2048    char *lastcomp;
2049    FILE *in = NULL;           /* Ensure initialized */
2050    
2051    #ifdef SUPPORT_LIBZ
2052    gzFile ingz = NULL;
2053    #endif
2054    
2055    #ifdef SUPPORT_LIBBZ2
2056    BZFILE *inbz2 = NULL;
2057    #endif
2058    
2059    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2060    int pathlen;
2061    #endif
2062    
2063    /* If the file name is "-" we scan stdin */
2064    
2065    if (strcmp(pathname, "-") == 0)
2066      {
2067      return pcregrep(stdin, FR_PLAIN, stdin_name,
2068        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2069          stdin_name : NULL);
2070      }
2071    
2072    /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2073    directories, whereas --include and --exclude apply to everything else. The test
2074    is against the final component of the path. */
2075    
2076    lastcomp = strrchr(pathname, FILESEP);
2077    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2078    
2079    /* If the file is a directory, skip if not recursing or if explicitly excluded.
2080    Otherwise, scan the directory and recurse for each path within it. The scanning
2081    code is localized so it can be made system-specific. */
2082    
2083    if (isdirectory(pathname))
2084      {
2085      if (dee_action == dee_SKIP ||
2086          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2087        return -1;
2088    
2089      if (dee_action == dee_RECURSE)
2090        {
2091        char buffer[1024];
2092        char *nextfile;
2093        directory_type *dir = opendirectory(pathname);
2094    
2095        if (dir == NULL)
2096          {
2097          if (!silent)
2098            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2099              strerror(errno));
2100          return 2;
2101          }
2102    
2103        while ((nextfile = readdirectory(dir)) != NULL)
2104          {
2105          int frc;
2106          sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2107          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2108          if (frc > 1) rc = frc;
2109           else if (frc == 0 && rc == 1) rc = 0;
2110          }
2111    
2112        closedirectory(dir);
2113        return rc;
2114        }
2115      }
2116    
2117    /* If the file is not a directory and not a regular file, skip it if that's
2118    been requested. Otherwise, check for explicit include/exclude. */
2119    
2120    else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2121              !test_incexc(lastcomp, include_patterns, exclude_patterns))
2122            return -1;
2123    
2124    /* Control reaches here if we have a regular file, or if we have a directory
2125    and recursion or skipping was not requested, or if we have anything else and
2126    skipping was not requested. The scan proceeds. If this is the first and only
2127    argument at top level, we don't show the file name, unless we are only showing
2128    the file name, or the filename was forced (-H). */
2129    
2130    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2131    pathlen = (int)(strlen(pathname));
2132    #endif
2133    
2134    /* Open using zlib if it is supported and the file name ends with .gz. */
2135    
2136    #ifdef SUPPORT_LIBZ
2137    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2138      {
2139      ingz = gzopen(pathname, "rb");
2140      if (ingz == NULL)
2141        {
2142        if (!silent)
2143          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2144            strerror(errno));
2145        return 2;
2146        }
2147      handle = (void *)ingz;
2148      frtype = FR_LIBZ;
2149      }
2150    else
2151    #endif
2152    
2153    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2154    
2155    #ifdef SUPPORT_LIBBZ2
2156    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2157      {
2158      inbz2 = BZ2_bzopen(pathname, "rb");
2159      handle = (void *)inbz2;
2160      frtype = FR_LIBBZ2;
2161      }
2162    else
2163    #endif
2164    
2165    /* Otherwise use plain fopen(). The label is so that we can come back here if
2166    an attempt to read a .bz2 file indicates that it really is a plain file. */
2167    
2168    #ifdef SUPPORT_LIBBZ2
2169    PLAIN_FILE:
2170    #endif
2171      {
2172      in = fopen(pathname, "rb");
2173      handle = (void *)in;
2174      frtype = FR_PLAIN;
2175      }
2176    
2177    /* All the opening methods return errno when they fail. */
2178    
2179    if (handle == NULL)
2180      {
2181      if (!silent)
2182        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2183          strerror(errno));
2184      return 2;
2185      }
2186    
2187    /* Now grep the file */
2188    
2189    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2190      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2191    
2192    /* Close in an appropriate manner. */
2193    
2194    #ifdef SUPPORT_LIBZ
2195    if (frtype == FR_LIBZ)
2196      gzclose(ingz);
2197    else
2198    #endif
2199    
2200    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2201    read failed. If the error indicates that the file isn't in fact bzipped, try
2202    again as a normal file. */
2203    
2204    #ifdef SUPPORT_LIBBZ2
2205    if (frtype == FR_LIBBZ2)
2206      {
2207      if (rc == 3)
2208        {
2209        int errnum;
2210        const char *err = BZ2_bzerror(inbz2, &errnum);
2211        if (errnum == BZ_DATA_ERROR_MAGIC)
2212          {
2213          BZ2_bzclose(inbz2);
2214          goto PLAIN_FILE;
2215          }
2216        else if (!silent)
2217          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2218            pathname, err);
2219        rc = 2;    /* The normal "something went wrong" code */
2220        }
2221      BZ2_bzclose(inbz2);
2222      }
2223    else
2224    #endif
2225    
2226    /* Normal file close */
2227    
2228    fclose(in);
2229    
2230    /* Pass back the yield from pcregrep(). */
2231    
2232    return rc;
2233    }
2234    
2235    
2236    
2237    /*************************************************
2238    *    Handle a single-letter, no data option      *
2239    *************************************************/
2240    
2241    static int
2242    handle_option(int letter, int options)
2243    {
2244    switch(letter)
2245      {
2246      case N_FOFFSETS: file_offsets = TRUE; break;
2247      case N_HELP: help(); pcregrep_exit(0);
2248      case N_LBUFFER: line_buffered = TRUE; break;
2249      case N_LOFFSETS: line_offsets = number = TRUE; break;
2250      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2251      case 'a': binary_files = BIN_TEXT; break;
2252      case 'c': count_only = TRUE; break;
2253      case 'F': process_options |= PO_FIXED_STRINGS; break;
2254      case 'H': filenames = FN_FORCE; break;
2255      case 'I': binary_files = BIN_NOMATCH; break;
2256      case 'h': filenames = FN_NONE; break;
2257      case 'i': options |= PCRE_CASELESS; break;
2258      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2259      case 'L': filenames = FN_NOMATCH_ONLY; break;
2260      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2261      case 'n': number = TRUE; break;
2262    
2263      case 'o':
2264      only_matching_last = add_number(0, only_matching_last);
2265      if (only_matching == NULL) only_matching = only_matching_last;
2266      break;
2267    
2268      case 'q': quiet = TRUE; break;
2269      case 'r': dee_action = dee_RECURSE; break;
2270      case 's': silent = TRUE; break;
2271      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2272      case 'v': invert = TRUE; break;
2273      case 'w': process_options |= PO_WORD_MATCH; break;
2274      case 'x': process_options |= PO_LINE_MATCH; break;
2275    
2276      case 'V':
2277      fprintf(stdout, "pcregrep version %s\n", pcre_version());
2278      pcregrep_exit(0);
2279      break;
2280    
2281      default:
2282      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2283      pcregrep_exit(usage(2));
2284      }
2285    
2286    return options;
2287    }
2288    
2289    
2290    
2291    
2292    /*************************************************
2293    *          Construct printed ordinal             *
2294    *************************************************/
2295    
2296    /* This turns a number into "1st", "3rd", etc. */
2297    
2298    static char *
2299    ordin(int n)
2300    {
2301    static char buffer[8];
2302    char *p = buffer;
2303    sprintf(p, "%d", n);
2304    while (*p != 0) p++;
2305    switch (n%10)
2306      {
2307      case 1: strcpy(p, "st"); break;
2308      case 2: strcpy(p, "nd"); break;
2309      case 3: strcpy(p, "rd"); break;
2310      default: strcpy(p, "th"); break;
2311      }
2312    return buffer;
2313    }
2314    
2315    
2316    
2317    /*************************************************
2318    *          Compile a single pattern              *
2319    *************************************************/
2320    
2321    /* Do nothing if the pattern has already been compiled. This is the case for
2322    include/exclude patterns read from a file.
2323    
2324    When the -F option has been used, each "pattern" may be a list of strings,
2325    separated by line breaks. They will be matched literally. We split such a
2326    string and compile the first substring, inserting an additional block into the
2327    pattern chain.
2328    
2329    Arguments:
2330      p              points to the pattern block
2331      options        the PCRE options
2332      popts          the processing options
2333      fromfile       TRUE if the pattern was read from a file
2334      fromtext       file name or identifying text (e.g. "include")
2335      count          0 if this is the only command line pattern, or
2336                     number of the command line pattern, or
2337                     linenumber for a pattern from a file
2338    
2339    Returns:         TRUE on success, FALSE after an error
2340    */
2341    
2342    static BOOL
2343    compile_pattern(patstr *p, int options, int popts, int fromfile,
2344      const char *fromtext, int count)
2345    {
2346    char buffer[PATBUFSIZE];
2347    const char *error;
2348    char *ps = p->string;
2349    int patlen = strlen(ps);
2350    int errptr;
2351    
2352    if (p->compiled != NULL) return TRUE;
2353    
2354    if ((popts & PO_FIXED_STRINGS) != 0)
2355      {
2356      int ellength;
2357      char *eop = ps + patlen;
2358      char *pe = end_of_line(ps, eop, &ellength);
2359    
2360      if (ellength != 0)
2361        {
2362        if (add_pattern(pe, p) == NULL) return FALSE;
2363        patlen = (int)(pe - ps - ellength);
2364        }
2365      }
2366    
2367    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2368    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2369    if (p->compiled != NULL) return TRUE;
2370    
2371    /* Handle compile errors */
2372    
2373    errptr -= (int)strlen(prefix[popts]);
2374    if (errptr > patlen) errptr = patlen;
2375    
2376    if (fromfile)
2377      {
2378      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2379        "at offset %d: %s\n", count, fromtext, errptr, error);
2380      }
2381    else
2382      {
2383      if (count == 0)
2384        fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2385          fromtext, errptr, error);
2386      else
2387        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2388          ordin(count), fromtext, errptr, error);
2389      }
2390    
2391    return FALSE;
2392    }
2393    
2394    
2395    
2396  /*************************************************  /*************************************************
2397  *                Handle an option                *  *     Read and compile a file of patterns        *
2398  *************************************************/  *************************************************/
2399    
2400  static int  /* This is used for --filelist, --include-from, and --exclude-from.
2401  handle_option(int letter, int options)  
2402    Arguments:
2403      name         the name of the file; "-" is stdin
2404      patptr       pointer to the pattern chain anchor
2405      patlastptr   pointer to the last pattern pointer
2406      popts        the process options to pass to pattern_compile()
2407    
2408    Returns:       TRUE if all went well
2409    */
2410    
2411    static BOOL
2412    read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2413  {  {
2414  switch(letter)  int linenumber = 0;
2415    FILE *f;
2416    char *filename;
2417    char buffer[PATBUFSIZE];
2418    
2419    if (strcmp(name, "-") == 0)
2420    {    {
2421    case -1:  help(); exit(0);    f = stdin;
2422    case 'c': count_only = TRUE; break;    filename = stdin_name;
2423    case 'h': filenames = FALSE; break;    }
2424    case 'i': options |= PCRE_CASELESS; break;  else
2425    case 'l': filenames_only = TRUE;    {
2426    case 'n': number = TRUE; break;    f = fopen(name, "r");
2427    case 'r': recurse = TRUE; break;    if (f == NULL)
2428    case 's': silent = TRUE; break;      {
2429    case 'v': invert = TRUE; break;      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2430    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;      return FALSE;
2431        }
2432      filename = name;
2433      }
2434    
2435    case 'V':  while (fgets(buffer, PATBUFSIZE, f) != NULL)
2436    fprintf(stderr, "pcregrep version %s using ", VERSION);    {
2437    fprintf(stderr, "PCRE version %s\n", pcre_version());    char *s = buffer + (int)strlen(buffer);
2438    exit(0);    while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2439    break;    *s = 0;
2440      linenumber++;
2441      if (buffer[0] == 0) continue;   /* Skip blank lines */
2442    
2443    default:    /* Note: this call to add_pattern() puts a pointer to the local variable
2444    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    "buffer" into the pattern chain. However, that pointer is used only when
2445    exit(usage(2));    compiling the pattern, which happens immediately below, so we flatten it
2446      afterwards, as a precaution against any later code trying to use it. */
2447    
2448      *patlastptr = add_pattern(buffer, *patlastptr);
2449      if (*patlastptr == NULL) return FALSE;
2450      if (*patptr == NULL) *patptr = *patlastptr;
2451    
2452      /* This loop is needed because compiling a "pattern" when -F is set may add
2453      on additional literal patterns if the original contains a newline. In the
2454      common case, it never will, because fgets() stops at a newline. However,
2455      the -N option can be used to give pcregrep a different newline setting. */
2456    
2457      for(;;)
2458        {
2459        if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2460            linenumber))
2461          return FALSE;
2462        (*patlastptr)->string = NULL;            /* Insurance */
2463        if ((*patlastptr)->next == NULL) break;
2464        *patlastptr = (*patlastptr)->next;
2465        }
2466    }    }
2467    
2468  return options;  if (f != stdin) fclose(f);
2469    return TRUE;
2470  }  }
2471    
2472    
2473    
   
2474  /*************************************************  /*************************************************
2475  *                Main program                    *  *                Main program                    *
2476  *************************************************/  *************************************************/
2477    
2478    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2479    
2480  int  int
2481  main(int argc, char **argv)  main(int argc, char **argv)
2482  {  {
2483  int i, j;  int i, j;
2484  int rc = 1;  int rc = 1;
 int options = 0;  
 int errptr;  
 const char *error;  
2485  BOOL only_one_at_top;  BOOL only_one_at_top;
2486    patstr *cp;
2487    fnstr *fn;
2488    const char *locale_from = "--locale";
2489    const char *error;
2490    
2491    #ifdef SUPPORT_PCREGREP_JIT
2492    pcre_jit_stack *jit_stack = NULL;
2493    #endif
2494    
2495    /* Set the default line ending value from the default in the PCRE library;
2496    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2497    Note that the return values from pcre_config(), though derived from the ASCII
2498    codes, are the same in EBCDIC environments, so we must use the actual values
2499    rather than escapes such as as '\r'. */
2500    
2501    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2502    switch(i)
2503      {
2504      default:               newline = (char *)"lf"; break;
2505      case 13:               newline = (char *)"cr"; break;
2506      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2507      case -1:               newline = (char *)"any"; break;
2508      case -2:               newline = (char *)"anycrlf"; break;
2509      }
2510    
2511  /* Process the options */  /* Process the options */
2512    
2513  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
2514    {    {
2515      option_item *op = NULL;
2516      char *option_data = (char *)"";    /* default to keep compiler happy */
2517      BOOL longop;
2518      BOOL longopwasequals = FALSE;
2519    
2520    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2521    
2522    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2523      but only if we have previously had -e or -f to define the patterns. */
2524    
2525      if (argv[i][1] == 0)
2526        {
2527        if (pattern_files != NULL || patterns != NULL) break;
2528          else pcregrep_exit(usage(2));
2529        }
2530    
2531      /* Handle a long name option, or -- to terminate the options */
2532    
2533    if (argv[i][1] == '-')    if (argv[i][1] == '-')
2534      {      {
2535      option_item *op;      char *arg = argv[i] + 2;
2536        char *argequals = strchr(arg, '=');
2537    
2538      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
2539        {        {
2540        pattern_filename = argv[i] + 7;        i++;
2541        continue;        break;                /* out of the options-handling loop */
2542        }        }
2543    
2544        longop = TRUE;
2545    
2546        /* Some long options have data that follows after =, for example file=name.
2547        Some options have variations in the long name spelling: specifically, we
2548        allow "regexp" because GNU grep allows it, though I personally go along
2549        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2550        These options are entered in the table as "regex(p)". Options can be in
2551        both these categories. */
2552    
2553      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2554        {        {
2555        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
2556          char *equals = strchr(op->long_name, '=');
2557    
2558          /* Handle options with only one spelling of the name */
2559    
2560          if (opbra == NULL)     /* Does not contain '(' */
2561            {
2562            if (equals == NULL)  /* Not thing=data case */
2563              {
2564              if (strcmp(arg, op->long_name) == 0) break;
2565              }
2566            else                 /* Special case xxx=data */
2567              {
2568              int oplen = (int)(equals - op->long_name);
2569              int arglen = (argequals == NULL)?
2570                (int)strlen(arg) : (int)(argequals - arg);
2571              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2572                {
2573                option_data = arg + arglen;
2574                if (*option_data == '=')
2575                  {
2576                  option_data++;
2577                  longopwasequals = TRUE;
2578                  }
2579                break;
2580                }
2581              }
2582            }
2583    
2584          /* Handle options with an alternate spelling of the name */
2585    
2586          else
2587          {          {
2588          options = handle_option(op->one_char, options);          char buff1[24];
2589          break;          char buff2[24];
2590    
2591            int baselen = (int)(opbra - op->long_name);
2592            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2593            int arglen = (argequals == NULL || equals == NULL)?
2594              (int)strlen(arg) : (int)(argequals - arg);
2595    
2596            sprintf(buff1, "%.*s", baselen, op->long_name);
2597            sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2598    
2599            if (strncmp(arg, buff1, arglen) == 0 ||
2600               strncmp(arg, buff2, arglen) == 0)
2601              {
2602              if (equals != NULL && argequals != NULL)
2603                {
2604                option_data = argequals;
2605                if (*option_data == '=')
2606                  {
2607                  option_data++;
2608                  longopwasequals = TRUE;
2609                  }
2610                }
2611              break;
2612              }
2613          }          }
2614        }        }
2615    
2616      if (op->one_char == 0)      if (op->one_char == 0)
2617        {        {
2618        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2619        exit(usage(2));        pcregrep_exit(usage(2));
2620        }        }
2621      }      }
2622    
2623    /* One-char options */    /* Jeffrey Friedl's debugging harness uses these additional options which
2624      are not in the right form for putting in the option table because they use
2625      only one hyphen, yet are more than one character long. By putting them
2626      separately here, they will not get displayed as part of the help() output,
2627      but I don't think Jeffrey will care about that. */
2628    
2629    #ifdef JFRIEDL_DEBUG
2630      else if (strcmp(argv[i], "-pre") == 0) {
2631              jfriedl_prefix = argv[++i];
2632              continue;
2633      } else if (strcmp(argv[i], "-post") == 0) {
2634              jfriedl_postfix = argv[++i];
2635              continue;
2636      } else if (strcmp(argv[i], "-XT") == 0) {
2637              sscanf(argv[++i], "%d", &jfriedl_XT);
2638              continue;
2639      } else if (strcmp(argv[i], "-XR") == 0) {
2640              sscanf(argv[++i], "%d", &jfriedl_XR);
2641              continue;
2642      }
2643    #endif
2644    
2645    
2646      /* One-char options; many that have no data may be in a single argument; we
2647      continue till we hit the last one or one that needs data. */
2648    
2649    else    else
2650      {      {
2651      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2652        longop = FALSE;
2653    
2654      while (*s != 0)      while (*s != 0)
2655        {        {
2656        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2657          {          {
2658          pattern_filename = s + 1;          if (*s == op->one_char) break;
2659          if (pattern_filename[0] == 0)          }
2660            {        if (op->one_char == 0)
2661            if (i >= argc - 1)          {
2662              {          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2663              fprintf(stderr, "pcregrep: File name missing after -f\n");            *s, argv[i]);
2664              exit(usage(2));          pcregrep_exit(usage(2));
2665              }          }
2666            pattern_filename = argv[++i];  
2667            }        option_data = s+1;
2668          break;  
2669          /* Break out if this is the last character in the string; it's handled
2670          below like a single multi-char option. */
2671    
2672          if (*option_data == 0) break;
2673    
2674          /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2675          are used for ones that either have a numerical number or defaults, i.e.
2676          the data is optional. If a digit follows, there is data; if not, carry on
2677          with other single-character options in the same string. */
2678    
2679          if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2680            {
2681            if (isdigit((unsigned char)s[1])) break;
2682            }
2683          else   /* Check for an option with data */
2684            {
2685            if (op->type != OP_NODATA) break;
2686          }          }
2687        else options = handle_option(*s++, options);  
2688          /* Handle a single-character option with no data, then loop for the
2689          next character in the string. */
2690    
2691          pcre_options = handle_option(*s++, pcre_options);
2692          }
2693        }
2694    
2695      /* At this point we should have op pointing to a matched option. If the type
2696      is NO_DATA, it means that there is no data, and the option might set
2697      something in the PCRE options. */
2698    
2699      if (op->type == OP_NODATA)
2700        {
2701        pcre_options = handle_option(op->one_char, pcre_options);
2702        continue;
2703        }
2704    
2705      /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2706      either has a value or defaults to something. It cannot have data in a
2707      separate item. At the moment, the only such options are "colo(u)r",
2708      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2709    
2710      if (*option_data == 0 &&
2711          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2712           op->type == OP_OP_NUMBERS))
2713        {
2714        switch (op->one_char)
2715          {
2716          case N_COLOUR:
2717          colour_option = (char *)"auto";
2718          break;
2719    
2720          case 'o':
2721          only_matching_last = add_number(0, only_matching_last);
2722          if (only_matching == NULL) only_matching = only_matching_last;
2723          break;
2724    
2725    #ifdef JFRIEDL_DEBUG
2726          case 'S':
2727          S_arg = 0;
2728          break;
2729    #endif
2730          }
2731        continue;
2732        }
2733    
2734      /* Otherwise, find the data string for the option. */
2735    
2736      if (*option_data == 0)
2737        {
2738        if (i >= argc - 1 || longopwasequals)
2739          {
2740          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2741          pcregrep_exit(usage(2));
2742          }
2743        option_data = argv[++i];
2744        }
2745    
2746      /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2747      added to a chain of numbers. */
2748    
2749      if (op->type == OP_OP_NUMBERS)
2750        {
2751        unsigned long int n = decode_number(option_data, op, longop);
2752        omdatastr *omd = (omdatastr *)op->dataptr;
2753        *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2754        if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2755        }
2756    
2757      /* If the option type is OP_PATLIST, it's the -e option, or one of the
2758      include/exclude options, which can be called multiple times to create lists
2759      of patterns. */
2760    
2761      else if (op->type == OP_PATLIST)
2762        {
2763        patdatastr *pd = (patdatastr *)op->dataptr;
2764        *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2765        if (*(pd->lastptr) == NULL) goto EXIT2;
2766        if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2767        }
2768    
2769      /* If the option type is OP_FILELIST, it's one of the options that names a
2770      file. */
2771    
2772      else if (op->type == OP_FILELIST)
2773        {
2774        fndatastr *fd = (fndatastr *)op->dataptr;
2775        fn = (fnstr *)malloc(sizeof(fnstr));
2776        if (fn == NULL)
2777          {
2778          fprintf(stderr, "pcregrep: malloc failed\n");
2779          goto EXIT2;
2780          }
2781        fn->next = NULL;
2782        fn->name = option_data;
2783        if (*(fd->anchor) == NULL)
2784          *(fd->anchor) = fn;
2785        else
2786          (*(fd->lastptr))->next = fn;
2787        *(fd->lastptr) = fn;
2788        }
2789    
2790      /* Handle OP_BINARY_FILES */
2791    
2792      else if (op->type == OP_BINFILES)
2793        {
2794        if (strcmp(option_data, "binary") == 0)
2795          binary_files = BIN_BINARY;
2796        else if (strcmp(option_data, "without-match") == 0)
2797          binary_files = BIN_NOMATCH;
2798        else if (strcmp(option_data, "text") == 0)
2799          binary_files = BIN_TEXT;
2800        else
2801          {
2802          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2803            option_data);
2804          pcregrep_exit(usage(2));
2805        }        }
2806      }      }
2807    
2808      /* Otherwise, deal with a single string or numeric data value. */
2809    
2810      else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2811               op->type != OP_OP_NUMBER)
2812        {
2813        *((char **)op->dataptr) = option_data;
2814        }
2815      else
2816        {
2817        unsigned long int n = decode_number(option_data, op, longop);
2818        if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2819          else *((int *)op->dataptr) = n;
2820        }
2821    }    }
2822    
2823  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* Options have been decoded. If -C was used, its value is used as a default
2824  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  for -A and -B. */
2825    
2826  if (pattern_list == NULL || hints_list == NULL)  if (both_context > 0)
2827    {    {
2828    fprintf(stderr, "pcregrep: malloc failed\n");    if (after_context == 0) after_context = both_context;
2829    return 2;    if (before_context == 0) before_context = both_context;
2830    }    }
2831    
2832  /* Compile the regular expression(s). */  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2833    However, all three set show_only_matching because they display, each in their
2834    own way, only the data that has matched. */
2835    
2836  if (pattern_filename != NULL)  if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2837        (file_offsets && line_offsets))
2838    {    {
2839    FILE *f = fopen(pattern_filename, "r");    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2840    char buffer[BUFSIZ];      "and/or --line-offsets\n");
2841    if (f == NULL)    pcregrep_exit(usage(2));
2842      }
2843    
2844    if (only_matching != NULL || file_offsets || line_offsets)
2845      show_only_matching = TRUE;
2846    
2847    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2848    LC_ALL environment variable is set, and if so, use it. */
2849    
2850    if (locale == NULL)
2851      {
2852      locale = getenv("LC_ALL");
2853      locale_from = "LCC_ALL";
2854      }
2855    
2856    if (locale == NULL)
2857      {
2858      locale = getenv("LC_CTYPE");
2859      locale_from = "LC_CTYPE";
2860      }
2861    
2862    /* If a locale has been provided, set it, and generate the tables the PCRE
2863    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2864    
2865    if (locale != NULL)
2866      {
2867      if (setlocale(LC_CTYPE, locale) == NULL)
2868      {      {
2869      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2870        strerror(errno));        locale, locale_from);
2871      return 2;      return 2;
2872      }      }
2873    while (fgets(buffer, sizeof(buffer), f) != NULL)    pcretables = pcre_maketables();
2874      }
2875    
2876    /* Sort out colouring */
2877    
2878    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2879      {
2880      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2881      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2882      else
2883      {      {
2884      char *s = buffer + (int)strlen(buffer);      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2885      if (pattern_count >= MAX_PATTERN_COUNT)        colour_option);
2886        {      return 2;
2887        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",      }
2888          MAX_PATTERN_COUNT);    if (do_colour)
2889        return 2;      {
2890        }      char *cs = getenv("PCREGREP_COLOUR");
2891      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2892      if (s == buffer) continue;      if (cs != NULL) colour_string = cs;
     *s = 0;  
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr, error);  
       return 2;  
       }  
2893      }      }
   fclose(f);  
2894    }    }
2895    
2896  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2897    
2898    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2899      {
2900      pcre_options |= PCRE_NEWLINE_CR;
2901      endlinetype = EL_CR;
2902      }
2903    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2904      {
2905      pcre_options |= PCRE_NEWLINE_LF;
2906      endlinetype = EL_LF;
2907      }
2908    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2909      {
2910      pcre_options |= PCRE_NEWLINE_CRLF;
2911      endlinetype = EL_CRLF;
2912      }
2913    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2914      {
2915      pcre_options |= PCRE_NEWLINE_ANY;
2916      endlinetype = EL_ANY;
2917      }
2918    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2919      {
2920      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2921      endlinetype = EL_ANYCRLF;
2922      }
2923  else  else
2924    {    {
2925    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2926    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2927    if (pattern_list[0] == NULL)    }
2928    
2929    /* Interpret the text values for -d and -D */
2930    
2931    if (dee_option != NULL)
2932      {
2933      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2934      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2935      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2936      else
2937        {
2938        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2939        return 2;
2940        }
2941      }
2942    
2943    if (DEE_option != NULL)
2944      {
2945      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2946      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2947      else
2948      {      {
2949      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
       error);  
2950      return 2;      return 2;
2951      }      }
   pattern_count++;  
2952    }    }
2953    
2954  /* Study the regular expressions, as we will be running them may times */  /* Check the values for Jeffrey Friedl's debugging options. */
2955    
2956    #ifdef JFRIEDL_DEBUG
2957    if (S_arg > 9)
2958      {
2959      fprintf(stderr, "pcregrep: bad value for -S option\n");
2960      return 2;
2961      }
2962    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2963      {
2964      if (jfriedl_XT == 0) jfriedl_XT = 1;
2965      if (jfriedl_XR == 0) jfriedl_XR = 1;
2966      }
2967    #endif
2968    
2969    /* Get memory for the main buffer. */
2970    
2971    bufsize = 3*bufthird;
2972    main_buffer = (char *)malloc(bufsize);
2973    
2974    if (main_buffer == NULL)
2975      {
2976      fprintf(stderr, "pcregrep: malloc failed\n");
2977      goto EXIT2;
2978      }
2979    
2980    /* If no patterns were provided by -e, and there are no files provided by -f,
2981    the first argument is the one and only pattern, and it must exist. */
2982    
2983    if (patterns == NULL && pattern_files == NULL)
2984      {
2985      if (i >= argc) return usage(2);
2986      patterns = patterns_last = add_pattern(argv[i++], NULL);
2987      if (patterns == NULL) goto EXIT2;
2988      }
2989    
2990    /* Compile the patterns that were provided on the command line, either by
2991    multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2992    after all the command-line options are read so that we know which PCRE options
2993    to use. When -F is used, compile_pattern() may add another block into the
2994    chain, so we must not access the next pointer till after the compile. */
2995    
2996    for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2997      {
2998      if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
2999           (j == 1 && patterns->next == NULL)? 0 : j))
3000        goto EXIT2;
3001      }
3002    
3003    /* Read and compile the regular expressions that are provided in files. */
3004    
3005    for (fn = pattern_files; fn != NULL; fn = fn->next)
3006      {
3007      if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3008        goto EXIT2;
3009      }
3010    
3011    /* Study the regular expressions, as we will be running them many times. If an
3012    extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3013    returned, even if studying produces no data. */
3014    
3015    if (match_limit > 0 || match_limit_recursion > 0)
3016      study_options |= PCRE_STUDY_EXTRA_NEEDED;
3017    
3018    /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3019    
3020    #ifdef SUPPORT_PCREGREP_JIT
3021    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3022      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3023    #endif
3024    
3025  for (j = 0; j < pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3026    {    {
3027    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
3028    if (error != NULL)    if (error != NULL)
3029      {      {
3030      char s[16];      char s[16];
3031      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3032      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3033      return 2;      goto EXIT2;
3034        }
3035    #ifdef SUPPORT_PCREGREP_JIT
3036      if (jit_stack != NULL && cp->hint != NULL)
3037        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3038    #endif
3039      }
3040    
3041    /* If --match-limit or --recursion-limit was set, put the value(s) into the
3042    pcre_extra block for each pattern. There will always be an extra block because
3043    of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3044    
3045    for (cp = patterns; cp != NULL; cp = cp->next)
3046      {
3047      if (match_limit > 0)
3048        {
3049        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3050        cp->hint->match_limit = match_limit;
3051        }
3052    
3053      if (match_limit_recursion > 0)
3054        {
3055        cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3056        cp->hint->match_limit_recursion = match_limit_recursion;
3057        }
3058      }
3059    
3060    /* If there are include or exclude patterns read from the command line, compile
3061    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3062    0. */
3063    
3064    for (j = 0; j < 4; j++)
3065      {
3066      int k;
3067      for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3068        {
3069        if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3070             (k == 1 && cp->next == NULL)? 0 : k))
3071          goto EXIT2;
3072      }      }
3073    }    }
3074    
3075  /* If there are no further arguments, do the business on stdin and exit */  /* Read and compile include/exclude patterns from files. */
3076    
3077    for (fn = include_from; fn != NULL; fn = fn->next)
3078      {
3079      if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3080        goto EXIT2;
3081      }
3082    
3083    for (fn = exclude_from; fn != NULL; fn = fn->next)
3084      {
3085      if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3086        goto EXIT2;
3087      }
3088    
3089    /* If there are no files that contain lists of files to search, and there are
3090    no file arguments, search stdin, and then exit. */
3091    
3092  if (i >= argc) return pcregrep(stdin, NULL);  if (file_lists == NULL && i >= argc)
3093      {
3094      rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3095        (filenames > FN_DEFAULT)? stdin_name : NULL);
3096      goto EXIT;
3097      }
3098    
3099    /* If any files that contains a list of files to search have been specified,
3100    read them line by line and search the given files. */
3101    
3102    for (fn = file_lists; fn != NULL; fn = fn->next)
3103      {
3104      char buffer[PATBUFSIZE];
3105      FILE *fl;
3106      if (strcmp(fn->name, "-") == 0) fl = stdin; else
3107        {
3108        fl = fopen(fn->name, "rb");
3109        if (fl == NULL)
3110          {
3111          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3112            strerror(errno));
3113          goto EXIT2;
3114          }
3115        }
3116      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3117        {
3118        int frc;
3119        char *end = buffer + (int)strlen(buffer);
3120        while (end > buffer && isspace(end[-1])) end--;
3121        *end = 0;
3122        if (*buffer != 0)
3123          {
3124          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3125          if (frc > 1) rc = frc;
3126            else if (frc == 0 && rc == 1) rc = 0;
3127          }
3128        }
3129      if (fl != stdin) fclose(fl);
3130      }
3131    
3132  /* Otherwise, work through the remaining arguments as files or directories.  /* After handling file-list, work through remaining arguments. Pass in the fact
3133  Pass in the fact that there is only one argument at top level - this suppresses  that there is only one argument at top level - this suppresses the file name if
3134  the file name if the argument is not a directory. */  the argument is not a directory and filenames are not otherwise forced. */
3135    
3136  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1 && file_lists == NULL;
 if (filenames_only) filenames = TRUE;  
3137    
3138  for (; i < argc; i++)  for (; i < argc; i++)
3139    {    {
3140    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3141    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
3142      if (frc > 1) rc = frc;
3143        else if (frc == 0 && rc == 1) rc = 0;
3144    }    }
3145    
3146  return rc;  EXIT:
3147    #ifdef SUPPORT_PCREGREP_JIT
3148    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3149    #endif
3150    
3151    if (main_buffer != NULL) free(main_buffer);
3152    
3153    free_pattern_chain(patterns);
3154    free_pattern_chain(include_patterns);
3155    free_pattern_chain(include_dir_patterns);
3156    free_pattern_chain(exclude_patterns);
3157    free_pattern_chain(exclude_dir_patterns);
3158    
3159    free_file_chain(exclude_from);
3160    free_file_chain(include_from);
3161    free_file_chain(pattern_files);
3162    free_file_chain(file_lists);
3163    
3164    while (only_matching != NULL)
3165      {
3166      omstr *this = only_matching;
3167      only_matching = this->next;
3168      free(this);
3169      }
3170    
3171    pcregrep_exit(rc);
3172    
3173    EXIT2:
3174    rc = 2;
3175    goto EXIT;
3176  }  }
3177    
3178  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.1039

  ViewVC Help
Powered by ViewVC 1.1.5