/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 1005 by ph10, Fri Aug 17 16:20:31 2012 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2012 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 70  its pattern matching. On a Unix system i
70    
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define VERSION "2.0 01-Aug-2001"  #define OFFSET_SIZE 99
74  #define MAX_PATTERN_COUNT 100  
75    #if BUFSIZ > 8192
76    #define MAXPATLEN BUFSIZ
77    #else
78    #define MAXPATLEN 8192
79    #endif
80    
81    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
82    
83    /* Values for the "filenames" variable, which specifies options for file name
84    output. The order is important; it is assumed that a file name is wanted for
85    all values greater than FN_DEFAULT. */
86    
87    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88    
89    /* File reading styles */
90    
91    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
92    
93    /* Actions for the -d and -D options */
94    
95    enum { dee_READ, dee_SKIP, dee_RECURSE };
96    enum { DEE_READ, DEE_SKIP };
97    
98    /* Actions for special processing options (flag bits) */
99    
100    #define PO_WORD_MATCH     0x0001
101    #define PO_LINE_MATCH     0x0002
102    #define PO_FIXED_STRINGS  0x0004
103    
104    /* Line ending types */
105    
106    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107    
108    /* Binary file options */
109    
110    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111    
112    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113    environments), a warning is issued if the value of fwrite() is ignored.
114    Unfortunately, casting to (void) does not suppress the warning. To get round
115    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116    apply to fprintf(). */
117    
118    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119    
120    
121    
122  /*************************************************  /*************************************************
123  *               Global variables                 *  *               Global variables                 *
124  *************************************************/  *************************************************/
125    
126  static char *pattern_filename = NULL;  /* Jeffrey Friedl has some debugging requirements that are not part of the
127  static int  pattern_count = 0;  regular code. */
128  static pcre **pattern_list;  
129  static pcre_extra **hints_list;  #ifdef JFRIEDL_DEBUG
130    static int S_arg = -1;
131    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
132    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
133    static const char *jfriedl_prefix = "";
134    static const char *jfriedl_postfix = "";
135    #endif
136    
137    static int  endlinetype;
138    
139    static char *colour_string = (char *)"1;31";
140    static char *colour_option = NULL;
141    static char *dee_option = NULL;
142    static char *DEE_option = NULL;
143    static char *locale = NULL;
144    static char *main_buffer = NULL;
145    static char *newline = NULL;
146    static char *stdin_name = (char *)"(standard input)";
147    
148    static const unsigned char *pcretables = NULL;
149    
150    static int after_context = 0;
151    static int before_context = 0;
152    static int binary_files = BIN_BINARY;
153    static int both_context = 0;
154    static int bufthird = PCREGREP_BUFSIZE;
155    static int bufsize = 3*PCREGREP_BUFSIZE;
156    
157    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
158    static int dee_action = dee_SKIP;
159    #else
160    static int dee_action = dee_READ;
161    #endif
162    
163    static int DEE_action = DEE_READ;
164    static int error_count = 0;
165    static int filenames = FN_DEFAULT;
166    static int only_matching = -1;
167    static int pcre_options = 0;
168    static int process_options = 0;
169    
170    #ifdef SUPPORT_PCREGREP_JIT
171    static int study_options = PCRE_STUDY_JIT_COMPILE;
172    #else
173    static int study_options = 0;
174    #endif
175    
176    static unsigned long int match_limit = 0;
177    static unsigned long int match_limit_recursion = 0;
178    
179  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
180  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
181  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
182    static BOOL hyphenpending = FALSE;
183  static BOOL invert = FALSE;  static BOOL invert = FALSE;
184    static BOOL line_buffered = FALSE;
185    static BOOL line_offsets = FALSE;
186    static BOOL multiline = FALSE;
187  static BOOL number = FALSE;  static BOOL number = FALSE;
188  static BOOL recurse = FALSE;  static BOOL omit_zero_count = FALSE;
189    static BOOL resource_error = FALSE;
190    static BOOL quiet = FALSE;
191  static BOOL silent = FALSE;  static BOOL silent = FALSE;
192  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
193    
194    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
195    
196    typedef struct fnstr {
197      struct fnstr *next;
198      char *name;
199    } fnstr;
200    
201    static fnstr *exclude_from = NULL;
202    static fnstr *exclude_from_last = NULL;
203    static fnstr *include_from = NULL;
204    static fnstr *include_from_last = NULL;
205    
206    static fnstr *file_lists = NULL;
207    static fnstr *file_lists_last = NULL;
208    static fnstr *pattern_files = NULL;
209    static fnstr *pattern_files_last = NULL;
210    
211    /* Structure for holding the two variables that describe a file name chain. */
212    
213    typedef struct fndatastr {
214      fnstr **anchor;
215      fnstr **lastptr;
216    } fndatastr;
217    
218    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
219    static fndatastr include_from_data = { &include_from, &include_from_last };
220    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
221    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
222    
223    /* Structure for pattern and its compiled form; used for matching patterns and
224    also for include/exclude patterns. */
225    
226    typedef struct patstr {
227      struct patstr *next;
228      char *string;
229      pcre *compiled;
230      pcre_extra *hint;
231    } patstr;
232    
233    static patstr *patterns = NULL;
234    static patstr *patterns_last = NULL;
235    static patstr *include_patterns = NULL;
236    static patstr *include_patterns_last = NULL;
237    static patstr *exclude_patterns = NULL;
238    static patstr *exclude_patterns_last = NULL;
239    static patstr *include_dir_patterns = NULL;
240    static patstr *include_dir_patterns_last = NULL;
241    static patstr *exclude_dir_patterns = NULL;
242    static patstr *exclude_dir_patterns_last = NULL;
243    
244    /* Structure holding the two variables that describe a pattern chain. A pointer
245    to such structures is used for each appropriate option. */
246    
247    typedef struct patdatastr {
248      patstr **anchor;
249      patstr **lastptr;
250    } patdatastr;
251    
252    static patdatastr match_patdata = { &patterns, &patterns_last };
253    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
254    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
255    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
256    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
257    
258    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
259                                     &include_dir_patterns, &exclude_dir_patterns };
260    
261    static const char *incexname[4] = { "--include", "--exclude",
262                                        "--include-dir", "--exclude-dir" };
263    
264  /* Structure for options and list of them */  /* Structure for options and list of them */
265    
266    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
267           OP_OP_NUMBER, OP_PATLIST, OP_FILELIST, OP_BINFILES };
268    
269  typedef struct option_item {  typedef struct option_item {
270      int type;
271    int one_char;    int one_char;
272    char *long_name;    void *dataptr;
273    char *help_text;    const char *long_name;
274      const char *help_text;
275  } option_item;  } option_item;
276    
277    /* Options without a single-letter equivalent get a negative value. This can be
278    used to identify them. */
279    
280    #define N_COLOUR       (-1)
281    #define N_EXCLUDE      (-2)
282    #define N_EXCLUDE_DIR  (-3)
283    #define N_HELP         (-4)
284    #define N_INCLUDE      (-5)
285    #define N_INCLUDE_DIR  (-6)
286    #define N_LABEL        (-7)
287    #define N_LOCALE       (-8)
288    #define N_NULL         (-9)
289    #define N_LOFFSETS     (-10)
290    #define N_FOFFSETS     (-11)
291    #define N_LBUFFER      (-12)
292    #define N_M_LIMIT      (-13)
293    #define N_M_LIMIT_REC  (-14)
294    #define N_BUFSIZE      (-15)
295    #define N_NOJIT        (-16)
296    #define N_FILE_LIST    (-17)
297    #define N_BINARY_FILES (-18)
298    #define N_EXCLUDE_FROM (-19)
299    #define N_INCLUDE_FROM (-20)
300    
301  static option_item optionlist[] = {  static option_item optionlist[] = {
302    { -1,  "help",         "display this help and exit" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
303    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
304    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
305    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
306    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
307    { 'n', "line-number",  "print line number with output lines" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
308    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
309    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
310    { 'V', "version",      "print version information and exit" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
311    { 'v', "invert-match", "select non-matching lines" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
312    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
313    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
314    { 0,    NULL,           NULL }    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
315      { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
316      { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
317      { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
318      { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
319      { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
320      { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
321      { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
322      { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
323      { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
324    #ifdef SUPPORT_PCREGREP_JIT
325      { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
326    #else
327      { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
328    #endif
329      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
330      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
331      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
332      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
333      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
334      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
335      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
336      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
337      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
338      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
339      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
340      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
341      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
342      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
343      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
344      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
345      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
346      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
347      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
348      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
349    
350      /* These two were accidentally implemented with underscores instead of
351      hyphens in the option names. As this was not discovered for several releases,
352      the incorrect versions are left in the table for compatibility. However, the
353      --help function misses out any option that has an underscore in its name. */
354    
355      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
356      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
357    
358    #ifdef JFRIEDL_DEBUG
359      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
360    #endif
361      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
362      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
363      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
364      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
365      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
366      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
367      { OP_NODATA,    0,        NULL,               NULL,            NULL }
368  };  };
369    
370    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
371    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
372    that the combination of -w and -x has the same effect as -x on its own, so we
373    can treat them as the same. Note that the MAXPATLEN macro assumes the longest
374    prefix+suffix is 10 characters; if anything longer is added, it must be
375    adjusted. */
376    
377    static const char *prefix[] = {
378      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
379    
380    static const char *suffix[] = {
381      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
382    
383    /* UTF-8 tables - used only when the newline setting is "any". */
384    
385    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
386    
387    const char utf8_table4[] = {
388      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
389      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
390      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
391      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
392    
393    
394    
395    /*************************************************
396    *          Add item to chain of patterns         *
397    *************************************************/
398    
399    /* Used to add an item onto a chain, or just return an unconnected item if the
400    "after" argument is NULL.
401    
402    Arguments:
403      s          pattern string to add
404      after      if not NULL points to item to insert after
405    
406    Returns:     new pattern block, or NULL after malloc failure
407    */
408    
409    static patstr *
410    add_pattern(char *s, patstr *after)
411    {
412    patstr *p = (patstr *)malloc(sizeof(patstr));
413    if (p == NULL)
414      {
415      fprintf(stderr, "pcregrep: malloc failed\n");
416      return NULL;
417      }
418    if (strlen(s) > MAXPATLEN)
419      {
420      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
421        MAXPATLEN);
422      return NULL;
423      }
424    p->next = NULL;
425    p->string = s;
426    p->compiled = NULL;
427    p->hint = NULL;
428    
429    if (after != NULL)
430      {
431      p->next = after->next;
432      after->next = p;
433      }
434    return p;
435    }
436    
437    
438    /*************************************************
439    *           Free chain of patterns               *
440    *************************************************/
441    
442    /* Used for several chains of patterns.
443    
444    Argument: pointer to start of chain
445    Returns:  nothing
446    */
447    
448    static void
449    free_pattern_chain(patstr *pc)
450    {
451    while (pc != NULL)
452      {
453      patstr *p = pc;
454      pc = p->next;
455      if (p->hint != NULL) pcre_free_study(p->hint);
456      if (p->compiled != NULL) pcre_free(p->compiled);
457      free(p);
458      }
459    }
460    
461    
462    /*************************************************
463    *           Free chain of file names             *
464    *************************************************/
465    
466    /*
467    Argument: pointer to start of chain
468    Returns:  nothing
469    */
470    
471    static void
472    free_file_chain(fnstr *fn)
473    {
474    while (fn != NULL)
475      {
476      fnstr *f = fn;
477      fn = f->next;
478      free(f);
479      }
480    }
481    
482    
483    /*************************************************
484    *         Exit from the program                  *
485    *************************************************/
486    
487    /* If there has been a resource error, give a suitable message.
488    
489    Argument:  the return code
490    Returns:   does not return
491    */
492    
493    static void
494    pcregrep_exit(int rc)
495    {
496    if (resource_error)
497      {
498      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
499        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
500        PCRE_ERROR_JIT_STACKLIMIT);
501      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
502      }
503    
504    exit(rc);
505    }
506    
507    
508  /*************************************************  /*************************************************
509  *       Functions for directory scanning         *  *            OS-specific functions               *
510  *************************************************/  *************************************************/
511    
512  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
513  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
514    
515    
516  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
517    
518  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
519  #include <sys/types.h>  #include <sys/types.h>
520  #include <sys/stat.h>  #include <sys/stat.h>
521  #include <dirent.h>  #include <dirent.h>
522    
523  typedef DIR directory_type;  typedef DIR directory_type;
524    #define FILESEP '/'
525    
526  int  static int
527  isdirectory(char *filename)  isdirectory(char *filename)
528  {  {
529  struct stat statbuf;  struct stat statbuf;
530  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
531    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
532  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
533  }  }
534    
535  directory_type *  static directory_type *
536  opendirectory(char *filename)  opendirectory(char *filename)
537  {  {
538  return opendir(filename);  return opendir(filename);
539  }  }
540    
541  char *  static char *
542  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
543  {  {
544  for (;;)  for (;;)
# Line 108  for (;;) Line 548  for (;;)
548    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
549      return dent->d_name;      return dent->d_name;
550    }    }
551    /* Control never reaches here */
552    }
553    
554    static void
555    closedirectory(directory_type *dir)
556    {
557    closedir(dir);
558    }
559    
560    
561    /************* Test for regular file in Unix **********/
562    
563    static int
564    isregfile(char *filename)
565    {
566    struct stat statbuf;
567    if (stat(filename, &statbuf) < 0)
568      return 1;        /* In the expectation that opening as a file will fail */
569    return (statbuf.st_mode & S_IFMT) == S_IFREG;
570    }
571    
572    
573    /************* Test for a terminal in Unix **********/
574    
575    static BOOL
576    is_stdout_tty(void)
577    {
578    return isatty(fileno(stdout));
579    }
580    
581    static BOOL
582    is_file_tty(FILE *f)
583    {
584    return isatty(fileno(f));
585    }
586    
587    
588    /************* Directory scanning in Win32 ***********/
589    
590    /* I (Philip Hazel) have no means of testing this code. It was contributed by
591    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
592    when it did not exist. David Byron added a patch that moved the #include of
593    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
594    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
595    undefined when it is indeed undefined. */
596    
597    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
598    
599    #ifndef STRICT
600    # define STRICT
601    #endif
602    #ifndef WIN32_LEAN_AND_MEAN
603    # define WIN32_LEAN_AND_MEAN
604    #endif
605    
606    #include <windows.h>
607    
608    #ifndef INVALID_FILE_ATTRIBUTES
609    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
610    #endif
611    
612    typedef struct directory_type
613    {
614    HANDLE handle;
615    BOOL first;
616    WIN32_FIND_DATA data;
617    } directory_type;
618    
619    #define FILESEP '/'
620    
621    int
622    isdirectory(char *filename)
623    {
624    DWORD attr = GetFileAttributes(filename);
625    if (attr == INVALID_FILE_ATTRIBUTES)
626      return 0;
627    return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
628    }
629    
630    directory_type *
631    opendirectory(char *filename)
632    {
633    size_t len;
634    char *pattern;
635    directory_type *dir;
636    DWORD err;
637    len = strlen(filename);
638    pattern = (char *)malloc(len + 3);
639    dir = (directory_type *)malloc(sizeof(*dir));
640    if ((pattern == NULL) || (dir == NULL))
641      {
642      fprintf(stderr, "pcregrep: malloc failed\n");
643      pcregrep_exit(2);
644      }
645    memcpy(pattern, filename, len);
646    memcpy(&(pattern[len]), "\\*", 3);
647    dir->handle = FindFirstFile(pattern, &(dir->data));
648    if (dir->handle != INVALID_HANDLE_VALUE)
649      {
650      free(pattern);
651      dir->first = TRUE;
652      return dir;
653      }
654    err = GetLastError();
655    free(pattern);
656    free(dir);
657    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
658    return NULL;
659    }
660    
661    char *
662    readdirectory(directory_type *dir)
663    {
664    for (;;)
665      {
666      if (!dir->first)
667        {
668        if (!FindNextFile(dir->handle, &(dir->data)))
669          return NULL;
670        }
671      else
672        {
673        dir->first = FALSE;
674        }
675      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
676        return dir->data.cFileName;
677      }
678    #ifndef _MSC_VER
679  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
680    #endif
681  }  }
682    
683  void  void
684  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
685  {  {
686  closedir(dir);  FindClose(dir->handle);
687    free(dir);
688  }  }
689    
690    
691  #else  /************* Test for regular file in Win32 **********/
692    
693    /* I don't know how to do this, or if it can be done; assume all paths are
694    regular if they are not directories. */
695    
696    int isregfile(char *filename)
697    {
698    return !isdirectory(filename);
699    }
700    
701    
702    /************* Test for a terminal in Win32 **********/
703    
704    /* I don't know how to do this; assume never */
705    
706    static BOOL
707    is_stdout_tty(void)
708    {
709    return FALSE;
710    }
711    
712    static BOOL
713    is_file_tty(FILE *f)
714    {
715    return FALSE;
716    }
717    
718    
719  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
720    
721  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
722    
723    #else
724    
725    #define FILESEP 0
726  typedef void directory_type;  typedef void directory_type;
727    
728  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
729  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
730  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
731  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
732    
733    
734    /************* Test for regular when we can't do it **********/
735    
736    /* Assume all files are regular. */
737    
738    int isregfile(char *filename) { return 1; }
739    
740    
741    /************* Test for a terminal when we can't do it **********/
742    
743    static BOOL
744    is_stdout_tty(void)
745    {
746    return FALSE;
747    }
748    
749    static BOOL
750    is_file_tty(FILE *f)
751    {
752    return FALSE;
753    }
754    
755  #endif  #endif
756    
757    
758    
759  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
760  /*************************************************  /*************************************************
761  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
762  *************************************************/  *************************************************/
# Line 159  return sys_errlist[n]; Line 779  return sys_errlist[n];
779    
780    
781  /*************************************************  /*************************************************
782  *              Grep an individual file           *  *            Test exclude/includes               *
783  *************************************************/  *************************************************/
784    
785  static int  /* If any exclude pattern matches, the path is excluded. Otherwise, unless
786  pcregrep(FILE *in, char *name)  there are no includes, the path must match an include pattern.
787    
788    Arguments:
789      path      the path to be matched
790      ip        the chain of include patterns
791      ep        the chain of exclude patterns
792    
793    Returns:    TRUE if the path is not excluded
794    */
795    
796    static BOOL
797    test_incexc(char *path, patstr *ip, patstr *ep)
798  {  {
799  int rc = 1;  int plen = strlen(path);
 int linenumber = 0;  
 int count = 0;  
 int offsets[99];  
 char buffer[BUFSIZ];  
800    
801  while (fgets(buffer, sizeof(buffer), in) != NULL)  for (; ep != NULL; ep = ep->next)
802    {    {
803    BOOL match = FALSE;    if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
804    int i;      return FALSE;
805    int length = (int)strlen(buffer);    }
   if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;  
   linenumber++;  
806    
807    for (i = 0; !match && i < pattern_count; i++)  if (ip == NULL) return TRUE;
     {  
     match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,  
       offsets, 99) >= 0;  
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
     }  
808    
809    if (match != invert)  for (; ip != NULL; ip = ip->next)
810      {    {
811      if (count_only) count++;    if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
812        return TRUE;
813      }
814    
815      else if (filenames_only)  return FALSE;
816        {  }
       fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);  
       return 0;  
       }  
817    
     else if (silent) return 0;  
818    
     else  
       {  
       if (name != NULL) fprintf(stdout, "%s:", name);  
       if (number) fprintf(stdout, "%d:", linenumber);  
       fprintf(stdout, "%s\n", buffer);  
       }  
819    
820      rc = 0;  /*************************************************
821      }  *            Read one line of input              *
822    }  *************************************************/
823    
824  if (count_only)  /* Normally, input is read using fread() into a large buffer, so many lines may
825    be read at once. However, doing this for tty input means that no output appears
826    until a lot of input has been typed. Instead, tty input is handled line by
827    line. We cannot use fgets() for this, because it does not stop at a binary
828    zero, and therefore there is no way of telling how many characters it has read,
829    because there may be binary zeros embedded in the data.
830    
831    Arguments:
832      buffer     the buffer to read into
833      length     the maximum number of characters to read
834      f          the file
835    
836    Returns:     the number of characters read, zero at end of file
837    */
838    
839    static unsigned int
840    read_one_line(char *buffer, int length, FILE *f)
841    {
842    int c;
843    int yield = 0;
844    while ((c = fgetc(f)) != EOF)
845    {    {
846    if (name != NULL) fprintf(stdout, "%s:", name);    buffer[yield++] = c;
847    fprintf(stdout, "%d\n", count);    if (c == '\n' || yield >= length) break;
848    }    }
849    return yield;
 return rc;  
850  }  }
851    
852    
853    
   
854  /*************************************************  /*************************************************
855  *     Grep a file or recurse into a directory    *  *             Find end of line                   *
856  *************************************************/  *************************************************/
857    
858  static int  /* The length of the endline sequence that is found is set via lenptr. This may
859  grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  be zero at the very end of the file if there is no line-ending sequence there.
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
860    
861  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
862  The scanning code is localized so it can be made system-specific. */    p         current position in line
863      endptr    end of available data
864      lenptr    where to put the length of the eol sequence
865    
866    Returns:    pointer after the last byte of the line,
867                including the newline byte(s)
868    */
869    
870  if ((sep = isdirectory(filename)) != 0 && recurse)  static char *
871    end_of_line(char *p, char *endptr, int *lenptr)
872    {
873    switch(endlinetype)
874    {    {
875    char buffer[1024];    default:      /* Just in case */
876    char *nextfile;    case EL_LF:
877    directory_type *dir = opendirectory(filename);    while (p < endptr && *p != '\n') p++;
878      if (p < endptr)
   if (dir == NULL)  
879      {      {
880      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,      *lenptr = 1;
881        strerror(errno));      return p + 1;
     return 2;  
882      }      }
883      *lenptr = 0;
884      return endptr;
885    
886    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CR:
887      while (p < endptr && *p != '\r') p++;
888      if (p < endptr)
889      {      {
890      int frc;      *lenptr = 1;
891      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      return p + 1;
     frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);  
     if (frc == 0 && rc == 1) rc = 0;  
892      }      }
893      *lenptr = 0;
894      return endptr;
895    
896    closedirectory(dir);    case EL_CRLF:
897    return rc;    for (;;)
898        {
899        while (p < endptr && *p != '\r') p++;
900        if (++p >= endptr)
901          {
902          *lenptr = 0;
903          return endptr;
904          }
905        if (*p == '\n')
906          {
907          *lenptr = 2;
908          return p + 1;
909          }
910        }
911      break;
912    
913      case EL_ANYCRLF:
914      while (p < endptr)
915        {
916        int extra = 0;
917        register int c = *((unsigned char *)p);
918    
919        if (utf8 && c >= 0xc0)
920          {
921          int gcii, gcss;
922          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
923          gcss = 6*extra;
924          c = (c & utf8_table3[extra]) << gcss;
925          for (gcii = 1; gcii <= extra; gcii++)
926            {
927            gcss -= 6;
928            c |= (p[gcii] & 0x3f) << gcss;
929            }
930          }
931    
932        p += 1 + extra;
933    
934        switch (c)
935          {
936          case 0x0a:    /* LF */
937          *lenptr = 1;
938          return p;
939    
940          case 0x0d:    /* CR */
941          if (p < endptr && *p == 0x0a)
942            {
943            *lenptr = 2;
944            p++;
945            }
946          else *lenptr = 1;
947          return p;
948    
949          default:
950          break;
951          }
952        }   /* End of loop for ANYCRLF case */
953    
954      *lenptr = 0;  /* Must have hit the end */
955      return endptr;
956    
957      case EL_ANY:
958      while (p < endptr)
959        {
960        int extra = 0;
961        register int c = *((unsigned char *)p);
962    
963        if (utf8 && c >= 0xc0)
964          {
965          int gcii, gcss;
966          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
967          gcss = 6*extra;
968          c = (c & utf8_table3[extra]) << gcss;
969          for (gcii = 1; gcii <= extra; gcii++)
970            {
971            gcss -= 6;
972            c |= (p[gcii] & 0x3f) << gcss;
973            }
974          }
975    
976        p += 1 + extra;
977    
978        switch (c)
979          {
980          case 0x0a:    /* LF */
981          case 0x0b:    /* VT */
982          case 0x0c:    /* FF */
983          *lenptr = 1;
984          return p;
985    
986          case 0x0d:    /* CR */
987          if (p < endptr && *p == 0x0a)
988            {
989            *lenptr = 2;
990            p++;
991            }
992          else *lenptr = 1;
993          return p;
994    
995          case 0x85:    /* NEL */
996          *lenptr = utf8? 2 : 1;
997          return p;
998    
999          case 0x2028:  /* LS */
1000          case 0x2029:  /* PS */
1001          *lenptr = 3;
1002          return p;
1003    
1004          default:
1005          break;
1006          }
1007        }   /* End of loop for ANY case */
1008    
1009      *lenptr = 0;  /* Must have hit the end */
1010      return endptr;
1011      }     /* End of overall switch */
1012    }
1013    
1014    
1015    
1016    /*************************************************
1017    *         Find start of previous line            *
1018    *************************************************/
1019    
1020    /* This is called when looking back for before lines to print.
1021    
1022    Arguments:
1023      p         start of the subsequent line
1024      startptr  start of available data
1025    
1026    Returns:    pointer to the start of the previous line
1027    */
1028    
1029    static char *
1030    previous_line(char *p, char *startptr)
1031    {
1032    switch(endlinetype)
1033      {
1034      default:      /* Just in case */
1035      case EL_LF:
1036      p--;
1037      while (p > startptr && p[-1] != '\n') p--;
1038      return p;
1039    
1040      case EL_CR:
1041      p--;
1042      while (p > startptr && p[-1] != '\n') p--;
1043      return p;
1044    
1045      case EL_CRLF:
1046      for (;;)
1047        {
1048        p -= 2;
1049        while (p > startptr && p[-1] != '\n') p--;
1050        if (p <= startptr + 1 || p[-2] == '\r') return p;
1051        }
1052      return p;   /* But control should never get here */
1053    
1054      case EL_ANY:
1055      case EL_ANYCRLF:
1056      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1057      if (utf8) while ((*p & 0xc0) == 0x80) p--;
1058    
1059      while (p > startptr)
1060        {
1061        register int c;
1062        char *pp = p - 1;
1063    
1064        if (utf8)
1065          {
1066          int extra = 0;
1067          while ((*pp & 0xc0) == 0x80) pp--;
1068          c = *((unsigned char *)pp);
1069          if (c >= 0xc0)
1070            {
1071            int gcii, gcss;
1072            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1073            gcss = 6*extra;
1074            c = (c & utf8_table3[extra]) << gcss;
1075            for (gcii = 1; gcii <= extra; gcii++)
1076              {
1077              gcss -= 6;
1078              c |= (pp[gcii] & 0x3f) << gcss;
1079              }
1080            }
1081          }
1082        else c = *((unsigned char *)pp);
1083    
1084        if (endlinetype == EL_ANYCRLF) switch (c)
1085          {
1086          case 0x0a:    /* LF */
1087          case 0x0d:    /* CR */
1088          return p;
1089    
1090          default:
1091          break;
1092          }
1093    
1094        else switch (c)
1095          {
1096          case 0x0a:    /* LF */
1097          case 0x0b:    /* VT */
1098          case 0x0c:    /* FF */
1099          case 0x0d:    /* CR */
1100          case 0x85:    /* NEL */
1101          case 0x2028:  /* LS */
1102          case 0x2029:  /* PS */
1103          return p;
1104    
1105          default:
1106          break;
1107          }
1108    
1109        p = pp;  /* Back one character */
1110        }        /* End of loop for ANY case */
1111    
1112      return startptr;  /* Hit start of data */
1113      }     /* End of overall switch */
1114    }
1115    
1116    
1117    
1118    
1119    
1120    /*************************************************
1121    *       Print the previous "after" lines         *
1122    *************************************************/
1123    
1124    /* This is called if we are about to lose said lines because of buffer filling,
1125    and at the end of the file. The data in the line is written using fwrite() so
1126    that a binary zero does not terminate it.
1127    
1128    Arguments:
1129      lastmatchnumber   the number of the last matching line, plus one
1130      lastmatchrestart  where we restarted after the last match
1131      endptr            end of available data
1132      printname         filename for printing
1133    
1134    Returns:            nothing
1135    */
1136    
1137    static void
1138    do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1139      char *printname)
1140    {
1141    if (after_context > 0 && lastmatchnumber > 0)
1142      {
1143      int count = 0;
1144      while (lastmatchrestart < endptr && count++ < after_context)
1145        {
1146        int ellength;
1147        char *pp = lastmatchrestart;
1148        if (printname != NULL) fprintf(stdout, "%s-", printname);
1149        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1150        pp = end_of_line(pp, endptr, &ellength);
1151        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1152        lastmatchrestart = pp;
1153        }
1154      hyphenpending = TRUE;
1155      }
1156    }
1157    
1158    
1159    
1160    /*************************************************
1161    *   Apply patterns to subject till one matches   *
1162    *************************************************/
1163    
1164    /* This function is called to run through all patterns, looking for a match. It
1165    is used multiple times for the same subject when colouring is enabled, in order
1166    to find all possible matches.
1167    
1168    Arguments:
1169      matchptr     the start of the subject
1170      length       the length of the subject to match
1171      startoffset  where to start matching
1172      offsets      the offets vector to fill in
1173      mrc          address of where to put the result of pcre_exec()
1174    
1175    Returns:      TRUE if there was a match
1176                  FALSE if there was no match
1177                  invert if there was a non-fatal error
1178    */
1179    
1180    static BOOL
1181    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
1182      int *mrc)
1183    {
1184    int i;
1185    size_t slen = length;
1186    patstr *p = patterns;
1187    const char *msg = "this text:\n\n";
1188    
1189    if (slen > 200)
1190      {
1191      slen = 200;
1192      msg = "text that starts:\n\n";
1193      }
1194    for (i = 1; p != NULL; p = p->next, i++)
1195      {
1196      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1197        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1198      if (*mrc >= 0) return TRUE;
1199      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1200      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1201      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1202      fprintf(stderr, "%s", msg);
1203      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1204      fprintf(stderr, "\n\n");
1205      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1206          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1207        resource_error = TRUE;
1208      if (error_count++ > 20)
1209        {
1210        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1211        pcregrep_exit(2);
1212        }
1213      return invert;    /* No more matching; don't show the line again */
1214      }
1215    
1216    return FALSE;  /* No match, no errors */
1217    }
1218    
1219    
1220    
1221    /*************************************************
1222    *            Grep an individual file             *
1223    *************************************************/
1224    
1225    /* This is called from grep_or_recurse() below. It uses a buffer that is three
1226    times the value of bufthird. The matching point is never allowed to stray into
1227    the top third of the buffer, thus keeping more of the file available for
1228    context printing or for multiline scanning. For large files, the pointer will
1229    be in the middle third most of the time, so the bottom third is available for
1230    "before" context printing.
1231    
1232    Arguments:
1233      handle       the fopened FILE stream for a normal file
1234                   the gzFile pointer when reading is via libz
1235                   the BZFILE pointer when reading is via libbz2
1236      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1237      filename     the file name or NULL (for errors)
1238      printname    the file name if it is to be printed for each match
1239                   or NULL if the file name is not to be printed
1240                   it cannot be NULL if filenames[_nomatch]_only is set
1241    
1242    Returns:       0 if there was at least one match
1243                   1 otherwise (no matches)
1244                   2 if an overlong line is encountered
1245                   3 if there is a read error on a .bz2 file
1246    */
1247    
1248    static int
1249    pcregrep(void *handle, int frtype, char *filename, char *printname)
1250    {
1251    int rc = 1;
1252    int linenumber = 1;
1253    int lastmatchnumber = 0;
1254    int count = 0;
1255    int filepos = 0;
1256    int offsets[OFFSET_SIZE];
1257    char *lastmatchrestart = NULL;
1258    char *ptr = main_buffer;
1259    char *endptr;
1260    size_t bufflength;
1261    BOOL binary = FALSE;
1262    BOOL endhyphenpending = FALSE;
1263    BOOL input_line_buffered = line_buffered;
1264    FILE *in = NULL;                    /* Ensure initialized */
1265    
1266    #ifdef SUPPORT_LIBZ
1267    gzFile ingz = NULL;
1268    #endif
1269    
1270    #ifdef SUPPORT_LIBBZ2
1271    BZFILE *inbz2 = NULL;
1272    #endif
1273    
1274    
1275    /* Do the first read into the start of the buffer and set up the pointer to end
1276    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1277    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1278    fail. */
1279    
1280    #ifdef SUPPORT_LIBZ
1281    if (frtype == FR_LIBZ)
1282      {
1283      ingz = (gzFile)handle;
1284      bufflength = gzread (ingz, main_buffer, bufsize);
1285      }
1286    else
1287    #endif
1288    
1289    #ifdef SUPPORT_LIBBZ2
1290    if (frtype == FR_LIBBZ2)
1291      {
1292      inbz2 = (BZFILE *)handle;
1293      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1294      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1295      }                                    /* without the cast it is unsigned. */
1296    else
1297    #endif
1298    
1299      {
1300      in = (FILE *)handle;
1301      if (is_file_tty(in)) input_line_buffered = TRUE;
1302      bufflength = input_line_buffered?
1303        read_one_line(main_buffer, bufsize, in) :
1304        fread(main_buffer, 1, bufsize, in);
1305      }
1306    
1307    endptr = main_buffer + bufflength;
1308    
1309    /* Unless binary-files=text, see if we have a binary file. This uses the same
1310    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1311    file. */
1312    
1313    if (binary_files != BIN_TEXT)
1314      {
1315      binary =
1316        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1317      if (binary && binary_files == BIN_NOMATCH) return 1;
1318      }
1319    
1320    /* Loop while the current pointer is not at the end of the file. For large
1321    files, endptr will be at the end of the buffer when we are in the middle of the
1322    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1323    way, the buffer is shifted left and re-filled. */
1324    
1325    while (ptr < endptr)
1326      {
1327      int endlinelength;
1328      int mrc = 0;
1329      int startoffset = 0;
1330      BOOL match;
1331      char *matchptr = ptr;
1332      char *t = ptr;
1333      size_t length, linelength;
1334    
1335      /* At this point, ptr is at the start of a line. We need to find the length
1336      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1337      length remainder of the data in the buffer. Otherwise, it is the length of
1338      the next line, excluding the terminating newline. After matching, we always
1339      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1340      option is used for compiling, so that any match is constrained to be in the
1341      first line. */
1342    
1343      t = end_of_line(t, endptr, &endlinelength);
1344      linelength = t - ptr - endlinelength;
1345      length = multiline? (size_t)(endptr - ptr) : linelength;
1346    
1347      /* Check to see if the line we are looking at extends right to the very end
1348      of the buffer without a line terminator. This means the line is too long to
1349      handle. */
1350    
1351      if (endlinelength == 0 && t == main_buffer + bufsize)
1352        {
1353        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1354                        "pcregrep: check the --buffer-size option\n",
1355                        linenumber,
1356                        (filename == NULL)? "" : " of file ",
1357                        (filename == NULL)? "" : filename);
1358        return 2;
1359        }
1360    
1361      /* Extra processing for Jeffrey Friedl's debugging. */
1362    
1363    #ifdef JFRIEDL_DEBUG
1364      if (jfriedl_XT || jfriedl_XR)
1365      {
1366          #include <sys/time.h>
1367          #include <time.h>
1368          struct timeval start_time, end_time;
1369          struct timezone dummy;
1370          int i;
1371    
1372          if (jfriedl_XT)
1373          {
1374              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1375              const char *orig = ptr;
1376              ptr = malloc(newlen + 1);
1377              if (!ptr) {
1378                      printf("out of memory");
1379                      pcregrep_exit(2);
1380              }
1381              endptr = ptr;
1382              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1383              for (i = 0; i < jfriedl_XT; i++) {
1384                      strncpy(endptr, orig,  length);
1385                      endptr += length;
1386              }
1387              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1388              length = newlen;
1389          }
1390    
1391          if (gettimeofday(&start_time, &dummy) != 0)
1392                  perror("bad gettimeofday");
1393    
1394    
1395          for (i = 0; i < jfriedl_XR; i++)
1396              match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1397                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1398    
1399          if (gettimeofday(&end_time, &dummy) != 0)
1400                  perror("bad gettimeofday");
1401    
1402          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1403                          -
1404                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1405    
1406          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1407          return 0;
1408      }
1409    #endif
1410    
1411      /* We come back here after a match when the -o option (only_matching) is set,
1412      in order to find any further matches in the same line. */
1413    
1414      ONLY_MATCHING_RESTART:
1415    
1416      /* Run through all the patterns until one matches or there is an error other
1417      than NOMATCH. This code is in a subroutine so that it can be re-used for
1418      finding subsequent matches when colouring matched lines. */
1419    
1420      match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1421    
1422      /* If it's a match or a not-match (as required), do what's wanted. */
1423    
1424      if (match != invert)
1425        {
1426        BOOL hyphenprinted = FALSE;
1427    
1428        /* We've failed if we want a file that doesn't have any matches. */
1429    
1430        if (filenames == FN_NOMATCH_ONLY) return 1;
1431    
1432        /* Just count if just counting is wanted. */
1433    
1434        if (count_only) count++;
1435    
1436        /* When handling a binary file and binary-files==binary, the "binary"
1437        variable will be set true (it's false in all other cases). In this
1438        situation we just want to output the file name. No need to scan further. */
1439    
1440        else if (binary)
1441          {
1442          fprintf(stdout, "Binary file %s matches\n", filename);
1443          return 0;
1444          }
1445    
1446        /* If all we want is a file name, there is no need to scan any more lines
1447        in the file. */
1448    
1449        else if (filenames == FN_MATCH_ONLY)
1450          {
1451          fprintf(stdout, "%s\n", printname);
1452          return 0;
1453          }
1454    
1455        /* Likewise, if all we want is a yes/no answer. */
1456    
1457        else if (quiet) return 0;
1458    
1459        /* The --only-matching option prints just the substring that matched, or a
1460        captured portion of it, as long as this string is not empty, and the
1461        --file-offsets and --line-offsets options output offsets for the matching
1462        substring (they both force --only-matching = 0). None of these options
1463        prints any context. Afterwards, adjust the start and then jump back to look
1464        for further matches in the same line. If we are in invert mode, however,
1465        nothing is printed and we do not restart - this could still be useful
1466        because the return code is set. */
1467    
1468        else if (only_matching >= 0)
1469          {
1470          if (!invert)
1471            {
1472            if (printname != NULL) fprintf(stdout, "%s:", printname);
1473            if (number) fprintf(stdout, "%d:", linenumber);
1474            if (line_offsets)
1475              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1476                offsets[1] - offsets[0]);
1477            else if (file_offsets)
1478              fprintf(stdout, "%d,%d\n",
1479                (int)(filepos + matchptr + offsets[0] - ptr),
1480                offsets[1] - offsets[0]);
1481            else if (only_matching < mrc)
1482              {
1483              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1484              if (plen > 0)
1485                {
1486                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1487                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1488                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1489                fprintf(stdout, "\n");
1490                }
1491              }
1492            else if (printname != NULL || number) fprintf(stdout, "\n");
1493            match = FALSE;
1494            if (line_buffered) fflush(stdout);
1495            rc = 0;                      /* Had some success */
1496            startoffset = offsets[1];    /* Restart after the match */
1497            goto ONLY_MATCHING_RESTART;
1498            }
1499          }
1500    
1501        /* This is the default case when none of the above options is set. We print
1502        the matching lines(s), possibly preceded and/or followed by other lines of
1503        context. */
1504    
1505        else
1506          {
1507          /* See if there is a requirement to print some "after" lines from a
1508          previous match. We never print any overlaps. */
1509    
1510          if (after_context > 0 && lastmatchnumber > 0)
1511            {
1512            int ellength;
1513            int linecount = 0;
1514            char *p = lastmatchrestart;
1515    
1516            while (p < ptr && linecount < after_context)
1517              {
1518              p = end_of_line(p, ptr, &ellength);
1519              linecount++;
1520              }
1521    
1522            /* It is important to advance lastmatchrestart during this printing so
1523            that it interacts correctly with any "before" printing below. Print
1524            each line's data using fwrite() in case there are binary zeroes. */
1525    
1526            while (lastmatchrestart < p)
1527              {
1528              char *pp = lastmatchrestart;
1529              if (printname != NULL) fprintf(stdout, "%s-", printname);
1530              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1531              pp = end_of_line(pp, endptr, &ellength);
1532              FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1533              lastmatchrestart = pp;
1534              }
1535            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1536            }
1537    
1538          /* If there were non-contiguous lines printed above, insert hyphens. */
1539    
1540          if (hyphenpending)
1541            {
1542            fprintf(stdout, "--\n");
1543            hyphenpending = FALSE;
1544            hyphenprinted = TRUE;
1545            }
1546    
1547          /* See if there is a requirement to print some "before" lines for this
1548          match. Again, don't print overlaps. */
1549    
1550          if (before_context > 0)
1551            {
1552            int linecount = 0;
1553            char *p = ptr;
1554    
1555            while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1556                   linecount < before_context)
1557              {
1558              linecount++;
1559              p = previous_line(p, main_buffer);
1560              }
1561    
1562            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1563              fprintf(stdout, "--\n");
1564    
1565            while (p < ptr)
1566              {
1567              int ellength;
1568              char *pp = p;
1569              if (printname != NULL) fprintf(stdout, "%s-", printname);
1570              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1571              pp = end_of_line(pp, endptr, &ellength);
1572              FWRITE(p, 1, pp - p, stdout);
1573              p = pp;
1574              }
1575            }
1576    
1577          /* Now print the matching line(s); ensure we set hyphenpending at the end
1578          of the file if any context lines are being output. */
1579    
1580          if (after_context > 0 || before_context > 0)
1581            endhyphenpending = TRUE;
1582    
1583          if (printname != NULL) fprintf(stdout, "%s:", printname);
1584          if (number) fprintf(stdout, "%d:", linenumber);
1585    
1586          /* In multiline mode, we want to print to the end of the line in which
1587          the end of the matched string is found, so we adjust linelength and the
1588          line number appropriately, but only when there actually was a match
1589          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1590          the match will always be before the first newline sequence. */
1591    
1592          if (multiline & !invert)
1593            {
1594            char *endmatch = ptr + offsets[1];
1595            t = ptr;
1596            while (t < endmatch)
1597              {
1598              t = end_of_line(t, endptr, &endlinelength);
1599              if (t < endmatch) linenumber++; else break;
1600              }
1601            linelength = t - ptr - endlinelength;
1602            }
1603    
1604          /*** NOTE: Use only fwrite() to output the data line, so that binary
1605          zeroes are treated as just another data character. */
1606    
1607          /* This extra option, for Jeffrey Friedl's debugging requirements,
1608          replaces the matched string, or a specific captured string if it exists,
1609          with X. When this happens, colouring is ignored. */
1610    
1611    #ifdef JFRIEDL_DEBUG
1612          if (S_arg >= 0 && S_arg < mrc)
1613            {
1614            int first = S_arg * 2;
1615            int last  = first + 1;
1616            FWRITE(ptr, 1, offsets[first], stdout);
1617            fprintf(stdout, "X");
1618            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1619            }
1620          else
1621    #endif
1622    
1623          /* We have to split the line(s) up if colouring, and search for further
1624          matches, but not of course if the line is a non-match. */
1625    
1626          if (do_colour && !invert)
1627            {
1628            int plength;
1629            FWRITE(ptr, 1, offsets[0], stdout);
1630            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1631            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1632            fprintf(stdout, "%c[00m", 0x1b);
1633            for (;;)
1634              {
1635              startoffset = offsets[1];
1636              if (startoffset >= (int)linelength + endlinelength ||
1637                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1638                break;
1639              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1640              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1641              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1642              fprintf(stdout, "%c[00m", 0x1b);
1643              }
1644    
1645            /* In multiline mode, we may have already printed the complete line
1646            and its line-ending characters (if they matched the pattern), so there
1647            may be no more to print. */
1648    
1649            plength = (int)((linelength + endlinelength) - startoffset);
1650            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1651            }
1652    
1653          /* Not colouring; no need to search for further matches */
1654    
1655          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1656          }
1657    
1658        /* End of doing what has to be done for a match. If --line-buffered was
1659        given, flush the output. */
1660    
1661        if (line_buffered) fflush(stdout);
1662        rc = 0;    /* Had some success */
1663    
1664        /* Remember where the last match happened for after_context. We remember
1665        where we are about to restart, and that line's number. */
1666    
1667        lastmatchrestart = ptr + linelength + endlinelength;
1668        lastmatchnumber = linenumber + 1;
1669        }
1670    
1671      /* For a match in multiline inverted mode (which of course did not cause
1672      anything to be printed), we have to move on to the end of the match before
1673      proceeding. */
1674    
1675      if (multiline && invert && match)
1676        {
1677        int ellength;
1678        char *endmatch = ptr + offsets[1];
1679        t = ptr;
1680        while (t < endmatch)
1681          {
1682          t = end_of_line(t, endptr, &ellength);
1683          if (t <= endmatch) linenumber++; else break;
1684          }
1685        endmatch = end_of_line(endmatch, endptr, &ellength);
1686        linelength = endmatch - ptr - ellength;
1687        }
1688    
1689      /* Advance to after the newline and increment the line number. The file
1690      offset to the current line is maintained in filepos. */
1691    
1692      ptr += linelength + endlinelength;
1693      filepos += (int)(linelength + endlinelength);
1694      linenumber++;
1695    
1696      /* If input is line buffered, and the buffer is not yet full, read another
1697      line and add it into the buffer. */
1698    
1699      if (input_line_buffered && bufflength < (size_t)bufsize)
1700        {
1701        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1702        bufflength += add;
1703        endptr += add;
1704        }
1705    
1706      /* If we haven't yet reached the end of the file (the buffer is full), and
1707      the current point is in the top 1/3 of the buffer, slide the buffer down by
1708      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1709      about to be lost, print them. */
1710    
1711      if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1712        {
1713        if (after_context > 0 &&
1714            lastmatchnumber > 0 &&
1715            lastmatchrestart < main_buffer + bufthird)
1716          {
1717          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1718          lastmatchnumber = 0;
1719          }
1720    
1721        /* Now do the shuffle */
1722    
1723        memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1724        ptr -= bufthird;
1725    
1726    #ifdef SUPPORT_LIBZ
1727        if (frtype == FR_LIBZ)
1728          bufflength = 2*bufthird +
1729            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1730        else
1731    #endif
1732    
1733    #ifdef SUPPORT_LIBBZ2
1734        if (frtype == FR_LIBBZ2)
1735          bufflength = 2*bufthird +
1736            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1737        else
1738    #endif
1739    
1740        bufflength = 2*bufthird +
1741          (input_line_buffered?
1742           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1743           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1744        endptr = main_buffer + bufflength;
1745    
1746        /* Adjust any last match point */
1747    
1748        if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1749        }
1750      }     /* Loop through the whole file */
1751    
1752    /* End of file; print final "after" lines if wanted; do_after_lines sets
1753    hyphenpending if it prints something. */
1754    
1755    if (only_matching < 0 && !count_only)
1756      {
1757      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1758      hyphenpending |= endhyphenpending;
1759      }
1760    
1761    /* Print the file name if we are looking for those without matches and there
1762    were none. If we found a match, we won't have got this far. */
1763    
1764    if (filenames == FN_NOMATCH_ONLY)
1765      {
1766      fprintf(stdout, "%s\n", printname);
1767      return 0;
1768    }    }
1769    
1770  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* Print the match count if wanted */
 the first and only argument at top level, we don't show the file name.  
 Otherwise, control is via the show_filenames variable. */  
1771    
1772  in = fopen(filename, "r");  if (count_only)
1773  if (in == NULL)    {
1774      if (count > 0 || !omit_zero_count)
1775        {
1776        if (printname != NULL && filenames != FN_NONE)
1777          fprintf(stdout, "%s:", printname);
1778        fprintf(stdout, "%d\n", count);
1779        }
1780      }
1781    
1782    return rc;
1783    }
1784    
1785    
1786    
1787    /*************************************************
1788    *     Grep a file or recurse into a directory    *
1789    *************************************************/
1790    
1791    /* Given a path name, if it's a directory, scan all the files if we are
1792    recursing; if it's a file, grep it.
1793    
1794    Arguments:
1795      pathname          the path to investigate
1796      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1797      only_one_at_top   TRUE if the path is the only one at toplevel
1798    
1799    Returns:  -1 the file/directory was skipped
1800               0 if there was at least one match
1801               1 if there were no matches
1802               2 there was some kind of error
1803    
1804    However, file opening failures are suppressed if "silent" is set.
1805    */
1806    
1807    static int
1808    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1809    {
1810    int rc = 1;
1811    int frtype;
1812    void *handle;
1813    char *lastcomp;
1814    FILE *in = NULL;           /* Ensure initialized */
1815    
1816    #ifdef SUPPORT_LIBZ
1817    gzFile ingz = NULL;
1818    #endif
1819    
1820    #ifdef SUPPORT_LIBBZ2
1821    BZFILE *inbz2 = NULL;
1822    #endif
1823    
1824    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1825    int pathlen;
1826    #endif
1827    
1828    /* If the file name is "-" we scan stdin */
1829    
1830    if (strcmp(pathname, "-") == 0)
1831      {
1832      return pcregrep(stdin, FR_PLAIN, stdin_name,
1833        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1834          stdin_name : NULL);
1835      }
1836    
1837    /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
1838    directories, whereas --include and --exclude apply to everything else. The test
1839    is against the final component of the path. */
1840    
1841    lastcomp = strrchr(pathname, FILESEP);
1842    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
1843    
1844    /* If the file is a directory, skip if not recursing or if explicitly excluded.
1845    Otherwise, scan the directory and recurse for each path within it. The scanning
1846    code is localized so it can be made system-specific. */
1847    
1848    if (isdirectory(pathname))
1849      {
1850      if (dee_action == dee_SKIP ||
1851          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
1852        return -1;
1853    
1854      if (dee_action == dee_RECURSE)
1855        {
1856        char buffer[1024];
1857        char *nextfile;
1858        directory_type *dir = opendirectory(pathname);
1859    
1860        if (dir == NULL)
1861          {
1862          if (!silent)
1863            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1864              strerror(errno));
1865          return 2;
1866          }
1867    
1868        while ((nextfile = readdirectory(dir)) != NULL)
1869          {
1870          int frc;
1871          sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
1872          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1873          if (frc > 1) rc = frc;
1874           else if (frc == 0 && rc == 1) rc = 0;
1875          }
1876    
1877        closedirectory(dir);
1878        return rc;
1879        }
1880      }
1881    
1882    /* If the file is not a directory and not a regular file, skip it if that's
1883    been requested. Otherwise, check for explicit include/exclude. */
1884    
1885    else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
1886              !test_incexc(lastcomp, include_patterns, exclude_patterns))
1887            return -1;
1888    
1889    /* Control reaches here if we have a regular file, or if we have a directory
1890    and recursion or skipping was not requested, or if we have anything else and
1891    skipping was not requested. The scan proceeds. If this is the first and only
1892    argument at top level, we don't show the file name, unless we are only showing
1893    the file name, or the filename was forced (-H). */
1894    
1895    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1896    pathlen = (int)(strlen(pathname));
1897    #endif
1898    
1899    /* Open using zlib if it is supported and the file name ends with .gz. */
1900    
1901    #ifdef SUPPORT_LIBZ
1902    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1903      {
1904      ingz = gzopen(pathname, "rb");
1905      if (ingz == NULL)
1906        {
1907        if (!silent)
1908          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1909            strerror(errno));
1910        return 2;
1911        }
1912      handle = (void *)ingz;
1913      frtype = FR_LIBZ;
1914      }
1915    else
1916    #endif
1917    
1918    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1919    
1920    #ifdef SUPPORT_LIBBZ2
1921    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1922      {
1923      inbz2 = BZ2_bzopen(pathname, "rb");
1924      handle = (void *)inbz2;
1925      frtype = FR_LIBBZ2;
1926      }
1927    else
1928    #endif
1929    
1930    /* Otherwise use plain fopen(). The label is so that we can come back here if
1931    an attempt to read a .bz2 file indicates that it really is a plain file. */
1932    
1933    #ifdef SUPPORT_LIBBZ2
1934    PLAIN_FILE:
1935    #endif
1936    {    {
1937    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));    in = fopen(pathname, "rb");
1938      handle = (void *)in;
1939      frtype = FR_PLAIN;
1940      }
1941    
1942    /* All the opening methods return errno when they fail. */
1943    
1944    if (handle == NULL)
1945      {
1946      if (!silent)
1947        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1948          strerror(errno));
1949    return 2;    return 2;
1950    }    }
1951    
1952  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);  /* Now grep the file */
1953    
1954    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1955      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1956    
1957    /* Close in an appropriate manner. */
1958    
1959    #ifdef SUPPORT_LIBZ
1960    if (frtype == FR_LIBZ)
1961      gzclose(ingz);
1962    else
1963    #endif
1964    
1965    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1966    read failed. If the error indicates that the file isn't in fact bzipped, try
1967    again as a normal file. */
1968    
1969    #ifdef SUPPORT_LIBBZ2
1970    if (frtype == FR_LIBBZ2)
1971      {
1972      if (rc == 3)
1973        {
1974        int errnum;
1975        const char *err = BZ2_bzerror(inbz2, &errnum);
1976        if (errnum == BZ_DATA_ERROR_MAGIC)
1977          {
1978          BZ2_bzclose(inbz2);
1979          goto PLAIN_FILE;
1980          }
1981        else if (!silent)
1982          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1983            pathname, err);
1984        rc = 2;    /* The normal "something went wrong" code */
1985        }
1986      BZ2_bzclose(inbz2);
1987      }
1988    else
1989    #endif
1990    
1991    /* Normal file close */
1992    
1993  fclose(in);  fclose(in);
1994    
1995    /* Pass back the yield from pcregrep(). */
1996    
1997  return rc;  return rc;
1998  }  }
1999    
# Line 287  return rc; Line 2007  return rc;
2007  static int  static int
2008  usage(int rc)  usage(int rc)
2009  {  {
2010  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  option_item *op;
2011  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Usage: pcregrep [-");
2012    for (op = optionlist; op->one_char != 0; op++)
2013      {
2014      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
2015      }
2016    fprintf(stderr, "] [long options] [pattern] [files]\n");
2017    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
2018      "options.\n");
2019  return rc;  return rc;
2020  }  }
2021    
# Line 304  help(void) Line 2031  help(void)
2031  {  {
2032  option_item *op;  option_item *op;
2033    
2034  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
2035  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
2036  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
2037    printf("\"-\" can be used as a file name to mean STDIN.\n");
2038    
2039    #ifdef SUPPORT_LIBZ
2040    printf("Files whose names end in .gz are read using zlib.\n");
2041    #endif
2042    
2043    #ifdef SUPPORT_LIBBZ2
2044    printf("Files whose names end in .bz2 are read using bzlib2.\n");
2045    #endif
2046    
2047    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2048    printf("Other files and the standard input are read as plain files.\n\n");
2049    #else
2050    printf("All files are read as plain files, without any interpretation.\n\n");
2051    #endif
2052    
2053    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
2054  printf("Options:\n");  printf("Options:\n");
2055    
2056  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
2057    {    {
2058    int n;    int n;
2059    char s[4];    char s[4];
2060    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
2061    printf("  %s --%s%n", s, op->long_name, &n);    /* Two options were accidentally implemented and documented with underscores
2062    n = 30 - n;    instead of hyphens in their names, something that was not noticed for quite a
2063      few releases. When fixing this, I left the underscored versions in the list
2064      in case people were using them. However, we don't want to display them in the
2065      help data. There are no other options that contain underscores, and we do not
2066      expect ever to implement such options. Therefore, just omit any option that
2067      contains an underscore. */
2068    
2069      if (strchr(op->long_name, '_') != NULL) continue;
2070    
2071      if (op->one_char > 0 && (op->long_name)[0] == 0)
2072        n = 31 - printf("  -%c", op->one_char);
2073      else
2074        {
2075        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
2076          else strcpy(s, "   ");
2077        n = 31 - printf("  %s --%s", s, op->long_name);
2078        }
2079    
2080    if (n < 1) n = 1;    if (n < 1) n = 1;
2081    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                           ", op->help_text);
2082    }    }
2083    
2084  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
2085  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
2086  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("When reading patterns or file names from a file, trailing white\n");
2087  printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("space is removed and blank lines are ignored.\n");
2088    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
2089    
2090  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
2091  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
2092  }  }
2093    
# Line 334  printf("Exit status is 0 if any matches, Line 2095  printf("Exit status is 0 if any matches,
2095    
2096    
2097  /*************************************************  /*************************************************
2098  *                Handle an option                *  *    Handle a single-letter, no data option      *
2099    *************************************************/
2100    
2101    static int
2102    handle_option(int letter, int options)
2103    {
2104    switch(letter)
2105      {
2106      case N_FOFFSETS: file_offsets = TRUE; break;
2107      case N_HELP: help(); pcregrep_exit(0);
2108      case N_LBUFFER: line_buffered = TRUE; break;
2109      case N_LOFFSETS: line_offsets = number = TRUE; break;
2110      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2111      case 'a': binary_files = BIN_TEXT; break;
2112      case 'c': count_only = TRUE; break;
2113      case 'F': process_options |= PO_FIXED_STRINGS; break;
2114      case 'H': filenames = FN_FORCE; break;
2115      case 'I': binary_files = BIN_NOMATCH; break;
2116      case 'h': filenames = FN_NONE; break;
2117      case 'i': options |= PCRE_CASELESS; break;
2118      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2119      case 'L': filenames = FN_NOMATCH_ONLY; break;
2120      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2121      case 'n': number = TRUE; break;
2122      case 'o': only_matching = 0; break;
2123      case 'q': quiet = TRUE; break;
2124      case 'r': dee_action = dee_RECURSE; break;
2125      case 's': silent = TRUE; break;
2126      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2127      case 'v': invert = TRUE; break;
2128      case 'w': process_options |= PO_WORD_MATCH; break;
2129      case 'x': process_options |= PO_LINE_MATCH; break;
2130    
2131      case 'V':
2132      fprintf(stdout, "pcregrep version %s\n", pcre_version());
2133      pcregrep_exit(0);
2134      break;
2135    
2136      default:
2137      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2138      pcregrep_exit(usage(2));
2139      }
2140    
2141    return options;
2142    }
2143    
2144    
2145    
2146    
2147    /*************************************************
2148    *          Construct printed ordinal             *
2149  *************************************************/  *************************************************/
2150    
2151  static int  /* This turns a number into "1st", "3rd", etc. */
2152  handle_option(int letter, int options)  
2153  {  static char *
2154  switch(letter)  ordin(int n)
2155    {
2156    static char buffer[8];
2157    char *p = buffer;
2158    sprintf(p, "%d", n);
2159    while (*p != 0) p++;
2160    switch (n%10)
2161      {
2162      case 1: strcpy(p, "st"); break;
2163      case 2: strcpy(p, "nd"); break;
2164      case 3: strcpy(p, "rd"); break;
2165      default: strcpy(p, "th"); break;
2166      }
2167    return buffer;
2168    }
2169    
2170    
2171    
2172    /*************************************************
2173    *          Compile a single pattern              *
2174    *************************************************/
2175    
2176    /* Do nothing if the pattern has already been compiled. This is the case for
2177    include/exclude patterns read from a file.
2178    
2179    When the -F option has been used, each "pattern" may be a list of strings,
2180    separated by line breaks. They will be matched literally. We split such a
2181    string and compile the first substring, inserting an additional block into the
2182    pattern chain.
2183    
2184    Arguments:
2185      p              points to the pattern block
2186      options        the PCRE options
2187      popts          the processing options
2188      fromfile       TRUE if the pattern was read from a file
2189      fromtext       file name or identifying text (e.g. "include")
2190      count          0 if this is the only command line pattern, or
2191                     number of the command line pattern, or
2192                     linenumber for a pattern from a file
2193    
2194    Returns:         TRUE on success, FALSE after an error
2195    */
2196    
2197    static BOOL
2198    compile_pattern(patstr *p, int options, int popts, int fromfile,
2199      const char *fromtext, int count)
2200    {
2201    char buffer[PATBUFSIZE];
2202    const char *error;
2203    char *ps = p->string;
2204    int patlen = strlen(ps);
2205    int errptr;
2206    
2207    if (p->compiled != NULL) return TRUE;
2208    
2209    if ((popts & PO_FIXED_STRINGS) != 0)
2210      {
2211      int ellength;
2212      char *eop = ps + patlen;
2213      char *pe = end_of_line(ps, eop, &ellength);
2214    
2215      if (ellength != 0)
2216        {
2217        if (add_pattern(pe, p) == NULL) return FALSE;
2218        patlen = (int)(pe - ps - ellength);
2219        }
2220      }
2221    
2222    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2223    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2224    if (p->compiled != NULL) return TRUE;
2225    
2226    /* Handle compile errors */
2227    
2228    errptr -= (int)strlen(prefix[popts]);
2229    if (errptr > patlen) errptr = patlen;
2230    
2231    if (fromfile)
2232      {
2233      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2234        "at offset %d: %s\n", count, fromtext, errptr, error);
2235      }
2236    else
2237      {
2238      if (count == 0)
2239        fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2240          fromtext, errptr, error);
2241      else
2242        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2243          ordin(count), fromtext, errptr, error);
2244      }
2245    
2246    return FALSE;
2247    }
2248    
2249    
2250    
2251    /*************************************************
2252    *     Read and compile a file of patterns        *
2253    *************************************************/
2254    
2255    /* This is used for --filelist, --include-from, and --exclude-from.
2256    
2257    Arguments:
2258      name         the name of the file; "-" is stdin
2259      patptr       pointer to the pattern chain anchor
2260      patlastptr   pointer to the last pattern pointer
2261      popts        the process options to pass to pattern_compile()
2262    
2263    Returns:       TRUE if all went well
2264    */
2265    
2266    static BOOL
2267    read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2268    {
2269    int linenumber = 0;
2270    FILE *f;
2271    char *filename;
2272    char buffer[PATBUFSIZE];
2273    
2274    if (strcmp(name, "-") == 0)
2275      {
2276      f = stdin;
2277      filename = stdin_name;
2278      }
2279    else
2280      {
2281      f = fopen(name, "r");
2282      if (f == NULL)
2283        {
2284        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2285        return FALSE;
2286        }
2287      filename = name;
2288      }
2289    
2290    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2291    {    {
2292    case -1:  help(); exit(0);    char *s = buffer + (int)strlen(buffer);
2293    case 'c': count_only = TRUE; break;    while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2294    case 'h': filenames = FALSE; break;    *s = 0;
2295    case 'i': options |= PCRE_CASELESS; break;    linenumber++;
2296    case 'l': filenames_only = TRUE;    if (buffer[0] == 0) continue;   /* Skip blank lines */
   case 'n': number = TRUE; break;  
   case 'r': recurse = TRUE; break;  
   case 's': silent = TRUE; break;  
   case 'v': invert = TRUE; break;  
   case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
2297    
2298    case 'V':    /* Note: this call to add_pattern() puts a pointer to the local variable
2299    fprintf(stderr, "pcregrep version %s using ", VERSION);    "buffer" into the pattern chain. However, that pointer is used only when
2300    fprintf(stderr, "PCRE version %s\n", pcre_version());    compiling the pattern, which happens immediately below, so we flatten it
2301    exit(0);    afterwards, as a precaution against any later code trying to use it. */
2302    break;  
2303      *patlastptr = add_pattern(buffer, *patlastptr);
2304      if (*patlastptr == NULL) return FALSE;
2305      if (*patptr == NULL) *patptr = *patlastptr;
2306    
2307      /* This loop is needed because compiling a "pattern" when -F is set may add
2308      on additional literal patterns if the original contains a newline. In the
2309      common case, it never will, because fgets() stops at a newline. However,
2310      the -N option can be used to give pcregrep a different newline setting. */
2311    
2312    default:    for(;;)
2313    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);      {
2314    exit(usage(2));      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2315            linenumber))
2316          return FALSE;
2317        (*patlastptr)->string = NULL;            /* Insurance */
2318        if ((*patlastptr)->next == NULL) break;
2319        *patlastptr = (*patlastptr)->next;
2320        }
2321    }    }
2322    
2323  return options;  if (f != stdin) fclose(f);
2324    return TRUE;
2325  }  }
2326    
2327    
2328    
   
2329  /*************************************************  /*************************************************
2330  *                Main program                    *  *                Main program                    *
2331  *************************************************/  *************************************************/
2332    
2333    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2334    
2335  int  int
2336  main(int argc, char **argv)  main(int argc, char **argv)
2337  {  {
2338  int i, j;  int i, j;
2339  int rc = 1;  int rc = 1;
 int options = 0;  
 int errptr;  
 const char *error;  
2340  BOOL only_one_at_top;  BOOL only_one_at_top;
2341    patstr *cp;
2342    fnstr *fn;
2343    const char *locale_from = "--locale";
2344    const char *error;
2345    
2346    #ifdef SUPPORT_PCREGREP_JIT
2347    pcre_jit_stack *jit_stack = NULL;
2348    #endif
2349    
2350    /* Set the default line ending value from the default in the PCRE library;
2351    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2352    Note that the return values from pcre_config(), though derived from the ASCII
2353    codes, are the same in EBCDIC environments, so we must use the actual values
2354    rather than escapes such as as '\r'. */
2355    
2356    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2357    switch(i)
2358      {
2359      default:               newline = (char *)"lf"; break;
2360      case 13:               newline = (char *)"cr"; break;
2361      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2362      case -1:               newline = (char *)"any"; break;
2363      case -2:               newline = (char *)"anycrlf"; break;
2364      }
2365    
2366  /* Process the options */  /* Process the options */
2367    
2368  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
2369    {    {
2370      option_item *op = NULL;
2371      char *option_data = (char *)"";    /* default to keep compiler happy */
2372      BOOL longop;
2373      BOOL longopwasequals = FALSE;
2374    
2375    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2376    
2377    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2378      but only if we have previously had -e or -f to define the patterns. */
2379    
2380      if (argv[i][1] == 0)
2381        {
2382        if (pattern_files != NULL || patterns != NULL) break;
2383          else pcregrep_exit(usage(2));
2384        }
2385    
2386      /* Handle a long name option, or -- to terminate the options */
2387    
2388    if (argv[i][1] == '-')    if (argv[i][1] == '-')
2389      {      {
2390      option_item *op;      char *arg = argv[i] + 2;
2391        char *argequals = strchr(arg, '=');
2392    
2393      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
2394        {        {
2395        pattern_filename = argv[i] + 7;        i++;
2396        continue;        break;                /* out of the options-handling loop */
2397        }        }
2398    
2399        longop = TRUE;
2400    
2401        /* Some long options have data that follows after =, for example file=name.
2402        Some options have variations in the long name spelling: specifically, we
2403        allow "regexp" because GNU grep allows it, though I personally go along
2404        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2405        These options are entered in the table as "regex(p)". Options can be in
2406        both these categories. */
2407    
2408      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2409        {        {
2410        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
2411          char *equals = strchr(op->long_name, '=');
2412    
2413          /* Handle options with only one spelling of the name */
2414    
2415          if (opbra == NULL)     /* Does not contain '(' */
2416            {
2417            if (equals == NULL)  /* Not thing=data case */
2418              {
2419              if (strcmp(arg, op->long_name) == 0) break;
2420              }
2421            else                 /* Special case xxx=data */
2422              {
2423              int oplen = (int)(equals - op->long_name);
2424              int arglen = (argequals == NULL)?
2425                (int)strlen(arg) : (int)(argequals - arg);
2426              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2427                {
2428                option_data = arg + arglen;
2429                if (*option_data == '=')
2430                  {
2431                  option_data++;
2432                  longopwasequals = TRUE;
2433                  }
2434                break;
2435                }
2436              }
2437            }
2438    
2439          /* Handle options with an alternate spelling of the name */
2440    
2441          else
2442          {          {
2443          options = handle_option(op->one_char, options);          char buff1[24];
2444          break;          char buff2[24];
2445    
2446            int baselen = (int)(opbra - op->long_name);
2447            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2448            int arglen = (argequals == NULL || equals == NULL)?
2449              (int)strlen(arg) : (int)(argequals - arg);
2450    
2451            sprintf(buff1, "%.*s", baselen, op->long_name);
2452            sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2453    
2454            if (strncmp(arg, buff1, arglen) == 0 ||
2455               strncmp(arg, buff2, arglen) == 0)
2456              {
2457              if (equals != NULL && argequals != NULL)
2458                {
2459                option_data = argequals;
2460                if (*option_data == '=')
2461                  {
2462                  option_data++;
2463                  longopwasequals = TRUE;
2464                  }
2465                }
2466              break;
2467              }
2468          }          }
2469        }        }
2470    
2471      if (op->one_char == 0)      if (op->one_char == 0)
2472        {        {
2473        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2474        exit(usage(2));        pcregrep_exit(usage(2));
2475        }        }
2476      }      }
2477    
2478    /* One-char options */    /* Jeffrey Friedl's debugging harness uses these additional options which
2479      are not in the right form for putting in the option table because they use
2480      only one hyphen, yet are more than one character long. By putting them
2481      separately here, they will not get displayed as part of the help() output,
2482      but I don't think Jeffrey will care about that. */
2483    
2484    #ifdef JFRIEDL_DEBUG
2485      else if (strcmp(argv[i], "-pre") == 0) {
2486              jfriedl_prefix = argv[++i];
2487              continue;
2488      } else if (strcmp(argv[i], "-post") == 0) {
2489              jfriedl_postfix = argv[++i];
2490              continue;
2491      } else if (strcmp(argv[i], "-XT") == 0) {
2492              sscanf(argv[++i], "%d", &jfriedl_XT);
2493              continue;
2494      } else if (strcmp(argv[i], "-XR") == 0) {
2495              sscanf(argv[++i], "%d", &jfriedl_XR);
2496              continue;
2497      }
2498    #endif
2499    
2500    
2501      /* One-char options; many that have no data may be in a single argument; we
2502      continue till we hit the last one or one that needs data. */
2503    
2504    else    else
2505      {      {
2506      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2507        longop = FALSE;
2508      while (*s != 0)      while (*s != 0)
2509        {        {
2510        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2511          {          {
2512          pattern_filename = s + 1;          if (*s == op->one_char) break;
2513          if (pattern_filename[0] == 0)          }
2514            {        if (op->one_char == 0)
2515            if (i >= argc - 1)          {
2516              {          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2517              fprintf(stderr, "pcregrep: File name missing after -f\n");            *s, argv[i]);
2518              exit(usage(2));          pcregrep_exit(usage(2));
2519              }          }
2520            pattern_filename = argv[++i];  
2521            }        /* Check for a single-character option that has data: OP_OP_NUMBER
2522          break;        is used for one that either has a numerical number or defaults, i.e. the
2523          data is optional. If a digit follows, there is data; if not, carry on
2524          with other single-character options in the same string. */
2525    
2526          option_data = s+1;
2527          if (op->type == OP_OP_NUMBER)
2528            {
2529            if (isdigit((unsigned char)s[1])) break;
2530            }
2531          else   /* Check for end or a dataless option */
2532            {
2533            if (op->type != OP_NODATA || s[1] == 0) break;
2534            }
2535    
2536          /* Handle a single-character option with no data, then loop for the
2537          next character in the string. */
2538    
2539          pcre_options = handle_option(*s++, pcre_options);
2540          }
2541        }
2542    
2543      /* At this point we should have op pointing to a matched option. If the type
2544      is NO_DATA, it means that there is no data, and the option might set
2545      something in the PCRE options. */
2546    
2547      if (op->type == OP_NODATA)
2548        {
2549        pcre_options = handle_option(op->one_char, pcre_options);
2550        continue;
2551        }
2552    
2553      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2554      either has a value or defaults to something. It cannot have data in a
2555      separate item. At the moment, the only such options are "colo(u)r",
2556      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2557    
2558      if (*option_data == 0 &&
2559          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2560        {
2561        switch (op->one_char)
2562          {
2563          case N_COLOUR:
2564          colour_option = (char *)"auto";
2565          break;
2566    
2567          case 'o':
2568          only_matching = 0;
2569          break;
2570    
2571    #ifdef JFRIEDL_DEBUG
2572          case 'S':
2573          S_arg = 0;
2574          break;
2575    #endif
2576          }
2577        continue;
2578        }
2579    
2580      /* Otherwise, find the data string for the option. */
2581    
2582      if (*option_data == 0)
2583        {
2584        if (i >= argc - 1 || longopwasequals)
2585          {
2586          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2587          pcregrep_exit(usage(2));
2588          }
2589        option_data = argv[++i];
2590        }
2591    
2592      /* If the option type is OP_PATLIST, it's the -e option, or one of the
2593      include/exclude options, which can be called multiple times to create lists
2594      of patterns. */
2595    
2596      if (op->type == OP_PATLIST)
2597         {
2598         patdatastr *pd = (patdatastr *)op->dataptr;
2599         *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2600         if (*(pd->lastptr) == NULL) goto EXIT2;
2601         if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2602         }
2603    
2604      /* If the option type is OP_FILELIST, it's one of the options that names a
2605      file. */
2606    
2607      else if (op->type == OP_FILELIST)
2608        {
2609        fndatastr *fd = (fndatastr *)op->dataptr;
2610        fn = (fnstr *)malloc(sizeof(fnstr));
2611        if (fn == NULL)
2612          {
2613          fprintf(stderr, "pcregrep: malloc failed\n");
2614          goto EXIT2;
2615          }
2616        fn->next = NULL;
2617        fn->name = option_data;
2618        if (*(fd->anchor) == NULL)
2619          *(fd->anchor) = fn;
2620        else
2621          (*(fd->lastptr))->next = fn;
2622        *(fd->lastptr) = fn;
2623        }
2624    
2625      /* Handle OP_BINARY_FILES */
2626    
2627      else if (op->type == OP_BINFILES)
2628        {
2629        if (strcmp(option_data, "binary") == 0)
2630          binary_files = BIN_BINARY;
2631        else if (strcmp(option_data, "without-match") == 0)
2632          binary_files = BIN_NOMATCH;
2633        else if (strcmp(option_data, "text") == 0)
2634          binary_files = BIN_TEXT;
2635        else
2636          {
2637          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2638            option_data);
2639          pcregrep_exit(usage(2));
2640          }
2641        }
2642    
2643      /* Otherwise, deal with single string or numeric data values. */
2644    
2645      else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2646               op->type != OP_OP_NUMBER)
2647        {
2648        *((char **)op->dataptr) = option_data;
2649        }
2650    
2651      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2652      only for unpicking arguments, so just keep it simple. */
2653    
2654      else
2655        {
2656        unsigned long int n = 0;
2657        char *endptr = option_data;
2658        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2659        while (isdigit((unsigned char)(*endptr)))
2660          n = n * 10 + (int)(*endptr++ - '0');
2661        if (toupper(*endptr) == 'K')
2662          {
2663          n *= 1024;
2664          endptr++;
2665          }
2666        else if (toupper(*endptr) == 'M')
2667          {
2668          n *= 1024*1024;
2669          endptr++;
2670          }
2671        if (*endptr != 0)
2672          {
2673          if (longop)
2674            {
2675            char *equals = strchr(op->long_name, '=');
2676            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2677              (int)(equals - op->long_name);
2678            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2679              option_data, nlen, op->long_name);
2680          }          }
2681        else options = handle_option(*s++, options);        else
2682            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2683              option_data, op->one_char);
2684          pcregrep_exit(usage(2));
2685        }        }
2686        if (op->type == OP_LONGNUMBER)
2687            *((unsigned long int *)op->dataptr) = n;
2688        else
2689            *((int *)op->dataptr) = n;
2690      }      }
2691    }    }
2692    
2693  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* Options have been decoded. If -C was used, its value is used as a default
2694  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  for -A and -B. */
2695    
2696  if (pattern_list == NULL || hints_list == NULL)  if (both_context > 0)
2697    {    {
2698    fprintf(stderr, "pcregrep: malloc failed\n");    if (after_context == 0) after_context = both_context;
2699    return 2;    if (before_context == 0) before_context = both_context;
2700    }    }
2701    
2702  /* Compile the regular expression(s). */  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2703    However, the latter two set only_matching. */
2704    
2705  if (pattern_filename != NULL)  if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2706        (file_offsets && line_offsets))
2707    {    {
2708    FILE *f = fopen(pattern_filename, "r");    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2709    char buffer[BUFSIZ];      "and/or --line-offsets\n");
2710    if (f == NULL)    pcregrep_exit(usage(2));
2711      }
2712    
2713    if (file_offsets || line_offsets) only_matching = 0;
2714    
2715    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2716    LC_ALL environment variable is set, and if so, use it. */
2717    
2718    if (locale == NULL)
2719      {
2720      locale = getenv("LC_ALL");
2721      locale_from = "LCC_ALL";
2722      }
2723    
2724    if (locale == NULL)
2725      {
2726      locale = getenv("LC_CTYPE");
2727      locale_from = "LC_CTYPE";
2728      }
2729    
2730    /* If a locale has been provided, set it, and generate the tables the PCRE
2731    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2732    
2733    if (locale != NULL)
2734      {
2735      if (setlocale(LC_CTYPE, locale) == NULL)
2736      {      {
2737      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2738        strerror(errno));        locale, locale_from);
2739        return 2;
2740        }
2741      pcretables = pcre_maketables();
2742      }
2743    
2744    /* Sort out colouring */
2745    
2746    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2747      {
2748      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2749      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2750      else
2751        {
2752        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2753          colour_option);
2754      return 2;      return 2;
2755      }      }
2756    while (fgets(buffer, sizeof(buffer), f) != NULL)    if (do_colour)
2757      {      {
2758      char *s = buffer + (int)strlen(buffer);      char *cs = getenv("PCREGREP_COLOUR");
2759      if (pattern_count >= MAX_PATTERN_COUNT)      if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2760        {      if (cs != NULL) colour_string = cs;
       fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",  
         MAX_PATTERN_COUNT);  
       return 2;  
       }  
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;  
     if (s == buffer) continue;  
     *s = 0;  
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr, error);  
       return 2;  
       }  
2761      }      }
   fclose(f);  
2762    }    }
2763    
2764  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2765    
2766    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2767      {
2768      pcre_options |= PCRE_NEWLINE_CR;
2769      endlinetype = EL_CR;
2770      }
2771    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2772      {
2773      pcre_options |= PCRE_NEWLINE_LF;
2774      endlinetype = EL_LF;
2775      }
2776    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2777      {
2778      pcre_options |= PCRE_NEWLINE_CRLF;
2779      endlinetype = EL_CRLF;
2780      }
2781    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2782      {
2783      pcre_options |= PCRE_NEWLINE_ANY;
2784      endlinetype = EL_ANY;
2785      }
2786    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2787      {
2788      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2789      endlinetype = EL_ANYCRLF;
2790      }
2791  else  else
2792    {    {
2793    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2794    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2795    if (pattern_list[0] == NULL)    }
2796    
2797    /* Interpret the text values for -d and -D */
2798    
2799    if (dee_option != NULL)
2800      {
2801      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2802      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2803      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2804      else
2805        {
2806        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2807        return 2;
2808        }
2809      }
2810    
2811    if (DEE_option != NULL)
2812      {
2813      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2814      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2815      else
2816      {      {
2817      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
       error);  
2818      return 2;      return 2;
2819      }      }
   pattern_count++;  
2820    }    }
2821    
2822  /* Study the regular expressions, as we will be running them may times */  /* Check the values for Jeffrey Friedl's debugging options. */
2823    
2824    #ifdef JFRIEDL_DEBUG
2825    if (S_arg > 9)
2826      {
2827      fprintf(stderr, "pcregrep: bad value for -S option\n");
2828      return 2;
2829      }
2830    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2831      {
2832      if (jfriedl_XT == 0) jfriedl_XT = 1;
2833      if (jfriedl_XR == 0) jfriedl_XR = 1;
2834      }
2835    #endif
2836    
2837    /* Get memory for the main buffer. */
2838    
2839    bufsize = 3*bufthird;
2840    main_buffer = (char *)malloc(bufsize);
2841    
2842    if (main_buffer == NULL)
2843      {
2844      fprintf(stderr, "pcregrep: malloc failed\n");
2845      goto EXIT2;
2846      }
2847    
2848    /* If no patterns were provided by -e, and there are no files provided by -f,
2849    the first argument is the one and only pattern, and it must exist. */
2850    
2851    if (patterns == NULL && pattern_files == NULL)
2852      {
2853      if (i >= argc) return usage(2);
2854      patterns = patterns_last = add_pattern(argv[i++], NULL);
2855      if (patterns == NULL) goto EXIT2;
2856      }
2857    
2858    /* Compile the patterns that were provided on the command line, either by
2859    multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2860    after all the command-line options are read so that we know which PCRE options
2861    to use. When -F is used, compile_pattern() may add another block into the
2862    chain, so we must not access the next pointer till after the compile. */
2863    
2864    for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2865      {
2866      if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
2867           (j == 1 && patterns->next == NULL)? 0 : j))
2868        goto EXIT2;
2869      }
2870    
2871    /* Read and compile the regular expressions that are provided in files. */
2872    
2873    for (fn = pattern_files; fn != NULL; fn = fn->next)
2874      {
2875      if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
2876        goto EXIT2;
2877      }
2878    
2879    /* Study the regular expressions, as we will be running them many times. Unless
2880    JIT has been explicitly disabled, arrange a stack for it to use. */
2881    
2882    #ifdef SUPPORT_PCREGREP_JIT
2883    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2884      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2885    #endif
2886    
2887  for (j = 0; j < pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2888    {    {
2889    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
2890    if (error != NULL)    if (error != NULL)
2891      {      {
2892      char s[16];      char s[16];
2893      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
2894      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2895      return 2;      goto EXIT2;
2896        }
2897    #ifdef SUPPORT_PCREGREP_JIT
2898      if (jit_stack != NULL && cp->hint != NULL)
2899        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
2900    #endif
2901      }
2902    
2903    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2904    pcre_extra block for each pattern. */
2905    
2906    if (match_limit > 0 || match_limit_recursion > 0)
2907      {
2908      for (cp = patterns; cp != NULL; cp = cp->next)
2909        {
2910        if (cp->hint == NULL)
2911          {
2912          cp->hint = (pcre_extra *)malloc(sizeof(pcre_extra));
2913          if (cp->hint == NULL)
2914            {
2915            fprintf(stderr, "pcregrep: malloc failed\n");
2916            pcregrep_exit(2);
2917            }
2918          }
2919        if (match_limit > 0)
2920          {
2921          cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
2922          cp->hint->match_limit = match_limit;
2923          }
2924        if (match_limit_recursion > 0)
2925          {
2926          cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2927          cp->hint->match_limit_recursion = match_limit_recursion;
2928          }
2929      }      }
2930    }    }
2931    
2932  /* If there are no further arguments, do the business on stdin and exit */  /* If there are include or exclude patterns read from the command line, compile
2933    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
2934    0. */
2935    
2936  if (i >= argc) return pcregrep(stdin, NULL);  for (j = 0; j < 4; j++)
2937      {
2938      int k;
2939      for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
2940        {
2941        if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
2942             (k == 1 && cp->next == NULL)? 0 : k))
2943          goto EXIT2;
2944        }
2945      }
2946    
2947    /* Read and compile include/exclude patterns from files. */
2948    
2949    for (fn = include_from; fn != NULL; fn = fn->next)
2950      {
2951      if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
2952        goto EXIT2;
2953      }
2954    
2955    for (fn = exclude_from; fn != NULL; fn = fn->next)
2956      {
2957      if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
2958        goto EXIT2;
2959      }
2960    
2961    /* If there are no files that contain lists of files to search, and there are
2962    no file arguments, search stdin, and then exit. */
2963    
2964    if (file_lists == NULL && i >= argc)
2965      {
2966      rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2967        (filenames > FN_DEFAULT)? stdin_name : NULL);
2968      goto EXIT;
2969      }
2970    
2971    /* If any files that contains a list of files to search have been specified,
2972    read them line by line and search the given files. */
2973    
2974  /* Otherwise, work through the remaining arguments as files or directories.  for (fn = file_lists; fn != NULL; fn = fn->next)
2975  Pass in the fact that there is only one argument at top level - this suppresses    {
2976  the file name if the argument is not a directory. */    char buffer[PATBUFSIZE];
2977      FILE *fl;
2978      if (strcmp(fn->name, "-") == 0) fl = stdin; else
2979        {
2980        fl = fopen(fn->name, "rb");
2981        if (fl == NULL)
2982          {
2983          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
2984            strerror(errno));
2985          goto EXIT2;
2986          }
2987        }
2988      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2989        {
2990        int frc;
2991        char *end = buffer + (int)strlen(buffer);
2992        while (end > buffer && isspace(end[-1])) end--;
2993        *end = 0;
2994        if (*buffer != 0)
2995          {
2996          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
2997          if (frc > 1) rc = frc;
2998            else if (frc == 0 && rc == 1) rc = 0;
2999          }
3000        }
3001      if (fl != stdin) fclose(fl);
3002      }
3003    
3004    /* After handling file-list, work through remaining arguments. Pass in the fact
3005    that there is only one argument at top level - this suppresses the file name if
3006    the argument is not a directory and filenames are not otherwise forced. */
3007    
3008  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1 && file_lists == NULL;
 if (filenames_only) filenames = TRUE;  
3009    
3010  for (; i < argc; i++)  for (; i < argc; i++)
3011    {    {
3012    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3013    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
3014      if (frc > 1) rc = frc;
3015        else if (frc == 0 && rc == 1) rc = 0;
3016    }    }
3017    
3018  return rc;  EXIT:
3019    #ifdef SUPPORT_PCREGREP_JIT
3020    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3021    #endif
3022    
3023    if (main_buffer != NULL) free(main_buffer);
3024    
3025    free_pattern_chain(patterns);
3026    free_pattern_chain(include_patterns);
3027    free_pattern_chain(include_dir_patterns);
3028    free_pattern_chain(exclude_patterns);
3029    free_pattern_chain(exclude_dir_patterns);
3030    
3031    free_file_chain(exclude_from);
3032    free_file_chain(include_from);
3033    free_file_chain(pattern_files);
3034    free_file_chain(file_lists);
3035    
3036    pcregrep_exit(rc);
3037    
3038    EXIT2:
3039    rc = 2;
3040    goto EXIT;
3041  }  }
3042    
3043  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.1005

  ViewVC Help
Powered by ViewVC 1.1.5