/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 63 by nigel, Sat Feb 24 21:40:03 2007 UTC revision 1033 by ph10, Mon Sep 10 11:02:48 2012 UTC
# Line 4  Line 4 
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories. */  directories.
8    
9               Copyright (c) 1997-2012 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 19  directories. */ Line 70  directories. */
70    
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define VERSION "3.0 14-Jan-2003"  #define OFFSET_SIZE 99
74  #define MAX_PATTERN_COUNT 100  
75    #if BUFSIZ > 8192
76    #define MAXPATLEN BUFSIZ
77    #else
78    #define MAXPATLEN 8192
79    #endif
80    
81    #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
82    
83    /* Values for the "filenames" variable, which specifies options for file name
84    output. The order is important; it is assumed that a file name is wanted for
85    all values greater than FN_DEFAULT. */
86    
87    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88    
89    /* File reading styles */
90    
91    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
92    
93    /* Actions for the -d and -D options */
94    
95    enum { dee_READ, dee_SKIP, dee_RECURSE };
96    enum { DEE_READ, DEE_SKIP };
97    
98    /* Actions for special processing options (flag bits) */
99    
100    #define PO_WORD_MATCH     0x0001
101    #define PO_LINE_MATCH     0x0002
102    #define PO_FIXED_STRINGS  0x0004
103    
104    /* Line ending types */
105    
106    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107    
108    /* Binary file options */
109    
110    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111    
112    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113    environments), a warning is issued if the value of fwrite() is ignored.
114    Unfortunately, casting to (void) does not suppress the warning. To get round
115    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116    apply to fprintf(). */
117    
118    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119    
120    
121    
122  /*************************************************  /*************************************************
123  *               Global variables                 *  *               Global variables                 *
124  *************************************************/  *************************************************/
125    
126  static char *pattern_filename = NULL;  /* Jeffrey Friedl has some debugging requirements that are not part of the
127  static int  pattern_count = 0;  regular code. */
128  static pcre **pattern_list;  
129  static pcre_extra **hints_list;  #ifdef JFRIEDL_DEBUG
130    static int S_arg = -1;
131    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
132    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
133    static const char *jfriedl_prefix = "";
134    static const char *jfriedl_postfix = "";
135    #endif
136    
137    static int  endlinetype;
138    
139    static char *colour_string = (char *)"1;31";
140    static char *colour_option = NULL;
141    static char *dee_option = NULL;
142    static char *DEE_option = NULL;
143    static char *locale = NULL;
144    static char *main_buffer = NULL;
145    static char *newline = NULL;
146    static char *stdin_name = (char *)"(standard input)";
147    
148    static const unsigned char *pcretables = NULL;
149    
150    static int after_context = 0;
151    static int before_context = 0;
152    static int binary_files = BIN_BINARY;
153    static int both_context = 0;
154    static int bufthird = PCREGREP_BUFSIZE;
155    static int bufsize = 3*PCREGREP_BUFSIZE;
156    
157    #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
158    static int dee_action = dee_SKIP;
159    #else
160    static int dee_action = dee_READ;
161    #endif
162    
163    static int DEE_action = DEE_READ;
164    static int error_count = 0;
165    static int filenames = FN_DEFAULT;
166    static int only_matching = -1;
167    static int pcre_options = 0;
168    static int process_options = 0;
169    
170    #ifdef SUPPORT_PCREGREP_JIT
171    static int study_options = PCRE_STUDY_JIT_COMPILE;
172    #else
173    static int study_options = 0;
174    #endif
175    
176    static unsigned long int match_limit = 0;
177    static unsigned long int match_limit_recursion = 0;
178    
179  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
180  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
181  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
182    static BOOL hyphenpending = FALSE;
183  static BOOL invert = FALSE;  static BOOL invert = FALSE;
184    static BOOL line_buffered = FALSE;
185    static BOOL line_offsets = FALSE;
186    static BOOL multiline = FALSE;
187  static BOOL number = FALSE;  static BOOL number = FALSE;
188  static BOOL recurse = FALSE;  static BOOL omit_zero_count = FALSE;
189    static BOOL resource_error = FALSE;
190    static BOOL quiet = FALSE;
191  static BOOL silent = FALSE;  static BOOL silent = FALSE;
192  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
193    
194    /* Structure for list of file names (for -f and --{in,ex}clude-from) */
195    
196    typedef struct fnstr {
197      struct fnstr *next;
198      char *name;
199    } fnstr;
200    
201    static fnstr *exclude_from = NULL;
202    static fnstr *exclude_from_last = NULL;
203    static fnstr *include_from = NULL;
204    static fnstr *include_from_last = NULL;
205    
206    static fnstr *file_lists = NULL;
207    static fnstr *file_lists_last = NULL;
208    static fnstr *pattern_files = NULL;
209    static fnstr *pattern_files_last = NULL;
210    
211    /* Structure for holding the two variables that describe a file name chain. */
212    
213    typedef struct fndatastr {
214      fnstr **anchor;
215      fnstr **lastptr;
216    } fndatastr;
217    
218    static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
219    static fndatastr include_from_data = { &include_from, &include_from_last };
220    static fndatastr file_lists_data = { &file_lists, &file_lists_last };
221    static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
222    
223    /* Structure for pattern and its compiled form; used for matching patterns and
224    also for include/exclude patterns. */
225    
226    typedef struct patstr {
227      struct patstr *next;
228      char *string;
229      pcre *compiled;
230      pcre_extra *hint;
231    } patstr;
232    
233    static patstr *patterns = NULL;
234    static patstr *patterns_last = NULL;
235    static patstr *include_patterns = NULL;
236    static patstr *include_patterns_last = NULL;
237    static patstr *exclude_patterns = NULL;
238    static patstr *exclude_patterns_last = NULL;
239    static patstr *include_dir_patterns = NULL;
240    static patstr *include_dir_patterns_last = NULL;
241    static patstr *exclude_dir_patterns = NULL;
242    static patstr *exclude_dir_patterns_last = NULL;
243    
244    /* Structure holding the two variables that describe a pattern chain. A pointer
245    to such structures is used for each appropriate option. */
246    
247    typedef struct patdatastr {
248      patstr **anchor;
249      patstr **lastptr;
250    } patdatastr;
251    
252    static patdatastr match_patdata = { &patterns, &patterns_last };
253    static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
254    static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
255    static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
256    static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
257    
258    static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
259                                     &include_dir_patterns, &exclude_dir_patterns };
260    
261    static const char *incexname[4] = { "--include", "--exclude",
262                                        "--include-dir", "--exclude-dir" };
263    
264  /* Structure for options and list of them */  /* Structure for options and list of them */
265    
266    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
267           OP_OP_NUMBER, OP_PATLIST, OP_FILELIST, OP_BINFILES };
268    
269  typedef struct option_item {  typedef struct option_item {
270      int type;
271    int one_char;    int one_char;
272    char *long_name;    void *dataptr;
273    char *help_text;    const char *long_name;
274      const char *help_text;
275  } option_item;  } option_item;
276    
277    /* Options without a single-letter equivalent get a negative value. This can be
278    used to identify them. */
279    
280    #define N_COLOUR       (-1)
281    #define N_EXCLUDE      (-2)
282    #define N_EXCLUDE_DIR  (-3)
283    #define N_HELP         (-4)
284    #define N_INCLUDE      (-5)
285    #define N_INCLUDE_DIR  (-6)
286    #define N_LABEL        (-7)
287    #define N_LOCALE       (-8)
288    #define N_NULL         (-9)
289    #define N_LOFFSETS     (-10)
290    #define N_FOFFSETS     (-11)
291    #define N_LBUFFER      (-12)
292    #define N_M_LIMIT      (-13)
293    #define N_M_LIMIT_REC  (-14)
294    #define N_BUFSIZE      (-15)
295    #define N_NOJIT        (-16)
296    #define N_FILE_LIST    (-17)
297    #define N_BINARY_FILES (-18)
298    #define N_EXCLUDE_FROM (-19)
299    #define N_INCLUDE_FROM (-20)
300    
301  static option_item optionlist[] = {  static option_item optionlist[] = {
302    { -1,  "help",         "display this help and exit" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
303    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
304    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
305    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
306    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
307    { 'n', "line-number",  "print line number with output lines" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
308    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
309    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
310    { 'u', "utf-8",        "use UTF-8 mode" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
311    { 'V', "version",      "print version information and exit" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
312    { 'v', "invert-match", "select non-matching lines" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
313    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
314    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
315    { 0,    NULL,           NULL }    { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
316      { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
317      { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
318      { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
319      { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
320      { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
321      { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
322      { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
323      { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
324    #ifdef SUPPORT_PCREGREP_JIT
325      { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
326    #else
327      { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
328    #endif
329      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
330      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
331      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
332      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
333      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
334      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
335      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
336      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
337      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
338      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
339      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
340      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
341      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
342      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
343      { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
344      { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
345      { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
346      { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
347      { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
348      { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
349    
350      /* These two were accidentally implemented with underscores instead of
351      hyphens in the option names. As this was not discovered for several releases,
352      the incorrect versions are left in the table for compatibility. However, the
353      --help function misses out any option that has an underscore in its name. */
354    
355      { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
356      { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
357    
358    #ifdef JFRIEDL_DEBUG
359      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
360    #endif
361      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
362      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
363      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
364      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
365      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
366      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
367      { OP_NODATA,    0,        NULL,               NULL,            NULL }
368  };  };
369    
370    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
371    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
372    that the combination of -w and -x has the same effect as -x on its own, so we
373    can treat them as the same. Note that the MAXPATLEN macro assumes the longest
374    prefix+suffix is 10 characters; if anything longer is added, it must be
375    adjusted. */
376    
377    static const char *prefix[] = {
378      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
379    
380    static const char *suffix[] = {
381      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
382    
383    /* UTF-8 tables - used only when the newline setting is "any". */
384    
385    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
386    
387    const char utf8_table4[] = {
388      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
389      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
390      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
391      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
392    
393    
394    
395    /*************************************************
396    *          Add item to chain of patterns         *
397    *************************************************/
398    
399    /* Used to add an item onto a chain, or just return an unconnected item if the
400    "after" argument is NULL.
401    
402    Arguments:
403      s          pattern string to add
404      after      if not NULL points to item to insert after
405    
406    Returns:     new pattern block, or NULL after malloc failure
407    */
408    
409    static patstr *
410    add_pattern(char *s, patstr *after)
411    {
412    patstr *p = (patstr *)malloc(sizeof(patstr));
413    if (p == NULL)
414      {
415      fprintf(stderr, "pcregrep: malloc failed\n");
416      return NULL;
417      }
418    if (strlen(s) > MAXPATLEN)
419      {
420      fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
421        MAXPATLEN);
422      return NULL;
423      }
424    p->next = NULL;
425    p->string = s;
426    p->compiled = NULL;
427    p->hint = NULL;
428    
429    if (after != NULL)
430      {
431      p->next = after->next;
432      after->next = p;
433      }
434    return p;
435    }
436    
437    
438    /*************************************************
439    *           Free chain of patterns               *
440    *************************************************/
441    
442    /* Used for several chains of patterns.
443    
444    Argument: pointer to start of chain
445    Returns:  nothing
446    */
447    
448    static void
449    free_pattern_chain(patstr *pc)
450    {
451    while (pc != NULL)
452      {
453      patstr *p = pc;
454      pc = p->next;
455      if (p->hint != NULL) pcre_free_study(p->hint);
456      if (p->compiled != NULL) pcre_free(p->compiled);
457      free(p);
458      }
459    }
460    
461    
462    /*************************************************
463    *           Free chain of file names             *
464    *************************************************/
465    
466    /*
467    Argument: pointer to start of chain
468    Returns:  nothing
469    */
470    
471    static void
472    free_file_chain(fnstr *fn)
473    {
474    while (fn != NULL)
475      {
476      fnstr *f = fn;
477      fn = f->next;
478      free(f);
479      }
480    }
481    
482    
483    /*************************************************
484    *         Exit from the program                  *
485    *************************************************/
486    
487    /* If there has been a resource error, give a suitable message.
488    
489    Argument:  the return code
490    Returns:   does not return
491    */
492    
493    static void
494    pcregrep_exit(int rc)
495    {
496    if (resource_error)
497      {
498      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
499        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
500        PCRE_ERROR_JIT_STACKLIMIT);
501      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
502      }
503    
504    exit(rc);
505    }
506    
507    
508  /*************************************************  /*************************************************
509  *       Functions for directory scanning         *  *            OS-specific functions               *
510  *************************************************/  *************************************************/
511    
512  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
513  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
514    
515    
516  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
517    
518  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
519  #include <sys/types.h>  #include <sys/types.h>
520  #include <sys/stat.h>  #include <sys/stat.h>
521  #include <dirent.h>  #include <dirent.h>
522    
523  typedef DIR directory_type;  typedef DIR directory_type;
524    #define FILESEP '/'
525    
526  int  static int
527  isdirectory(char *filename)  isdirectory(char *filename)
528  {  {
529  struct stat statbuf;  struct stat statbuf;
530  if (stat(filename, &statbuf) < 0)  if (stat(filename, &statbuf) < 0)
531    return 0;        /* In the expectation that opening as a file will fail */    return 0;        /* In the expectation that opening as a file will fail */
532  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return (statbuf.st_mode & S_IFMT) == S_IFDIR;
533  }  }
534    
535  directory_type *  static directory_type *
536  opendirectory(char *filename)  opendirectory(char *filename)
537  {  {
538  return opendir(filename);  return opendir(filename);
539  }  }
540    
541  char *  static char *
542  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
543  {  {
544  for (;;)  for (;;)
# Line 110  for (;;) Line 548  for (;;)
548    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
549      return dent->d_name;      return dent->d_name;
550    }    }
551  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
552  }  }
553    
554  void  static void
555  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
556  {  {
557  closedir(dir);  closedir(dir);
558  }  }
559    
560    
561    /************* Test for regular file in Unix **********/
562    
563    static int
564    isregfile(char *filename)
565    {
566    struct stat statbuf;
567    if (stat(filename, &statbuf) < 0)
568      return 1;        /* In the expectation that opening as a file will fail */
569    return (statbuf.st_mode & S_IFMT) == S_IFREG;
570    }
571    
572    
573    /************* Test for a terminal in Unix **********/
574    
575    static BOOL
576    is_stdout_tty(void)
577    {
578    return isatty(fileno(stdout));
579    }
580    
581    static BOOL
582    is_file_tty(FILE *f)
583    {
584    return isatty(fileno(f));
585    }
586    
587    
588  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
589    
590  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
591  Lionel Fourquaux. */  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
592    when it did not exist. David Byron added a patch that moved the #include of
593    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
594    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
595    undefined when it is indeed undefined. */
596    
597    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
 #elif HAVE_WIN32API  
598    
599  #ifndef STRICT  #ifndef STRICT
600  # define STRICT  # define STRICT
# Line 134  Lionel Fourquaux. */ Line 602  Lionel Fourquaux. */
602  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
603  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
604  #endif  #endif
605    
606  #include <windows.h>  #include <windows.h>
607    
608    #ifndef INVALID_FILE_ATTRIBUTES
609    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
610    #endif
611    
612  typedef struct directory_type  typedef struct directory_type
613  {  {
614  HANDLE handle;  HANDLE handle;
# Line 143  BOOL first; Line 616  BOOL first;
616  WIN32_FIND_DATA data;  WIN32_FIND_DATA data;
617  } directory_type;  } directory_type;
618    
619    #define FILESEP '/'
620    
621  int  int
622  isdirectory(char *filename)  isdirectory(char *filename)
623  {  {
624  DWORD attr = GetFileAttributes(filename);  DWORD attr = GetFileAttributes(filename);
625  if (attr == INVALID_FILE_ATTRIBUTES)  if (attr == INVALID_FILE_ATTRIBUTES)
626    return 0;    return 0;
627  return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;  return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
628  }  }
629    
630  directory_type *  directory_type *
# Line 160  char *pattern; Line 635  char *pattern;
635  directory_type *dir;  directory_type *dir;
636  DWORD err;  DWORD err;
637  len = strlen(filename);  len = strlen(filename);
638  pattern = (char *) malloc(len + 3);  pattern = (char *)malloc(len + 3);
639  dir = (directory_type *) malloc(sizeof(*dir));  dir = (directory_type *)malloc(sizeof(*dir));
640  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
641    {    {
642    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
643    exit(2);    pcregrep_exit(2);
644    }    }
645  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
646  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 213  free(dir); Line 688  free(dir);
688  }  }
689    
690    
691    /************* Test for regular file in Win32 **********/
692    
693    /* I don't know how to do this, or if it can be done; assume all paths are
694    regular if they are not directories. */
695    
696    int isregfile(char *filename)
697    {
698    return !isdirectory(filename);
699    }
700    
701    
702    /************* Test for a terminal in Win32 **********/
703    
704    /* I don't know how to do this; assume never */
705    
706    static BOOL
707    is_stdout_tty(void)
708    {
709    return FALSE;
710    }
711    
712    static BOOL
713    is_file_tty(FILE *f)
714    {
715    return FALSE;
716    }
717    
718    
719  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
720    
721  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
722    
723  #else  #else
724    
725    #define FILESEP 0
726  typedef void directory_type;  typedef void directory_type;
727    
728  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
729  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
730  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
731  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
732    
733    
734    /************* Test for regular when we can't do it **********/
735    
736    /* Assume all files are regular. */
737    
738    int isregfile(char *filename) { return 1; }
739    
740    
741    /************* Test for a terminal when we can't do it **********/
742    
743    static BOOL
744    is_stdout_tty(void)
745    {
746    return FALSE;
747    }
748    
749    static BOOL
750    is_file_tty(FILE *f)
751    {
752    return FALSE;
753    }
754    
755  #endif  #endif
756    
757    
758    
759  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
760  /*************************************************  /*************************************************
761  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
762  *************************************************/  *************************************************/
# Line 253  return sys_errlist[n]; Line 779  return sys_errlist[n];
779    
780    
781  /*************************************************  /*************************************************
782  *              Grep an individual file           *  *            Test exclude/includes               *
783  *************************************************/  *************************************************/
784    
785  static int  /* If any exclude pattern matches, the path is excluded. Otherwise, unless
786  pcregrep(FILE *in, char *name)  there are no includes, the path must match an include pattern.
787    
788    Arguments:
789      path      the path to be matched
790      ip        the chain of include patterns
791      ep        the chain of exclude patterns
792    
793    Returns:    TRUE if the path is not excluded
794    */
795    
796    static BOOL
797    test_incexc(char *path, patstr *ip, patstr *ep)
798  {  {
799  int rc = 1;  int plen = strlen(path);
 int linenumber = 0;  
 int count = 0;  
 int offsets[99];  
 char buffer[BUFSIZ];  
800    
801  while (fgets(buffer, sizeof(buffer), in) != NULL)  for (; ep != NULL; ep = ep->next)
802    {    {
803    BOOL match = FALSE;    if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
804    int i;      return FALSE;
805    int length = (int)strlen(buffer);    }
   if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;  
   linenumber++;  
806    
807    for (i = 0; !match && i < pattern_count; i++)  if (ip == NULL) return TRUE;
     {  
     match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,  
       offsets, 99) >= 0;  
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
     }  
808    
809    if (match != invert)  for (; ip != NULL; ip = ip->next)
810      {    {
811      if (count_only) count++;    if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
812        return TRUE;
813      }
814    
815      else if (filenames_only)  return FALSE;
816        {  }
       fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);  
       return 0;  
       }  
817    
     else if (silent) return 0;  
818    
     else  
       {  
       if (name != NULL) fprintf(stdout, "%s:", name);  
       if (number) fprintf(stdout, "%d:", linenumber);  
       fprintf(stdout, "%s\n", buffer);  
       }  
819    
820      rc = 0;  /*************************************************
821      }  *            Read one line of input              *
822    }  *************************************************/
823    
824  if (count_only)  /* Normally, input is read using fread() into a large buffer, so many lines may
825    be read at once. However, doing this for tty input means that no output appears
826    until a lot of input has been typed. Instead, tty input is handled line by
827    line. We cannot use fgets() for this, because it does not stop at a binary
828    zero, and therefore there is no way of telling how many characters it has read,
829    because there may be binary zeros embedded in the data.
830    
831    Arguments:
832      buffer     the buffer to read into
833      length     the maximum number of characters to read
834      f          the file
835    
836    Returns:     the number of characters read, zero at end of file
837    */
838    
839    static unsigned int
840    read_one_line(char *buffer, int length, FILE *f)
841    {
842    int c;
843    int yield = 0;
844    while ((c = fgetc(f)) != EOF)
845    {    {
846    if (name != NULL) fprintf(stdout, "%s:", name);    buffer[yield++] = c;
847    fprintf(stdout, "%d\n", count);    if (c == '\n' || yield >= length) break;
848    }    }
849    return yield;
 return rc;  
850  }  }
851    
852    
853    
   
854  /*************************************************  /*************************************************
855  *     Grep a file or recurse into a directory    *  *             Find end of line                   *
856  *************************************************/  *************************************************/
857    
858  static int  /* The length of the endline sequence that is found is set via lenptr. This may
859  grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  be zero at the very end of the file if there is no line-ending sequence there.
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
860    
861  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
862  The scanning code is localized so it can be made system-specific. */    p         current position in line
863      endptr    end of available data
864      lenptr    where to put the length of the eol sequence
865    
866    Returns:    pointer after the last byte of the line,
867                including the newline byte(s)
868    */
869    
870  if ((sep = isdirectory(filename)) != 0 && recurse)  static char *
871    end_of_line(char *p, char *endptr, int *lenptr)
872    {
873    switch(endlinetype)
874    {    {
875    char buffer[1024];    default:      /* Just in case */
876    char *nextfile;    case EL_LF:
877    directory_type *dir = opendirectory(filename);    while (p < endptr && *p != '\n') p++;
878      if (p < endptr)
879        {
880        *lenptr = 1;
881        return p + 1;
882        }
883      *lenptr = 0;
884      return endptr;
885    
886    if (dir == NULL)    case EL_CR:
887      while (p < endptr && *p != '\r') p++;
888      if (p < endptr)
889      {      {
890      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,      *lenptr = 1;
891        strerror(errno));      return p + 1;
     return 2;  
892      }      }
893      *lenptr = 0;
894      return endptr;
895    
896    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
897      for (;;)
898      {      {
899      int frc;      while (p < endptr && *p != '\r') p++;
900      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      if (++p >= endptr)
901      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);        {
902      if (frc == 0 && rc == 1) rc = 0;        *lenptr = 0;
903          return endptr;
904          }
905        if (*p == '\n')
906          {
907          *lenptr = 2;
908          return p + 1;
909          }
910      }      }
911      break;
912    
913    closedirectory(dir);    case EL_ANYCRLF:
914    return rc;    while (p < endptr)
915    }      {
916        int extra = 0;
917        register int c = *((unsigned char *)p);
918    
919  /* If the file is not a directory, or we are not recursing, scan it. If this is      if (utf8 && c >= 0xc0)
920  the first and only argument at top level, we don't show the file name (unless        {
921  we are only showing the file name). Otherwise, control is via the        int gcii, gcss;
922  show_filenames variable. */        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
923          gcss = 6*extra;
924          c = (c & utf8_table3[extra]) << gcss;
925          for (gcii = 1; gcii <= extra; gcii++)
926            {
927            gcss -= 6;
928            c |= (p[gcii] & 0x3f) << gcss;
929            }
930          }
931    
932  in = fopen(filename, "r");      p += 1 + extra;
 if (in == NULL)  
   {  
   fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));  
   return 2;  
   }  
933    
934  rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?      switch (c)
935    filename : NULL);        {
936  fclose(in);        case '\n':
937  return rc;        *lenptr = 1;
938  }        return p;
939    
940          case '\r':
941          if (p < endptr && *p == '\n')
942            {
943            *lenptr = 2;
944            p++;
945            }
946          else *lenptr = 1;
947          return p;
948    
949          default:
950          break;
951          }
952        }   /* End of loop for ANYCRLF case */
953    
954      *lenptr = 0;  /* Must have hit the end */
955      return endptr;
956    
957  /*************************************************    case EL_ANY:
958  *                Usage function                  *    while (p < endptr)
959  *************************************************/      {
960        int extra = 0;
961        register int c = *((unsigned char *)p);
962    
963  static int      if (utf8 && c >= 0xc0)
964  usage(int rc)        {
965  {        int gcii, gcss;
966  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
967  fprintf(stderr, "Type `pcregrep --help' for more information.\n");        gcss = 6*extra;
968  return rc;        c = (c & utf8_table3[extra]) << gcss;
969  }        for (gcii = 1; gcii <= extra; gcii++)
970            {
971            gcss -= 6;
972            c |= (p[gcii] & 0x3f) << gcss;
973            }
974          }
975    
976        p += 1 + extra;
977    
978        switch (c)
979          {
980          case '\n':    /* LF */
981          case '\v':    /* VT */
982          case '\f':    /* FF */
983          *lenptr = 1;
984          return p;
985    
986          case '\r':    /* CR */
987          if (p < endptr && *p == '\n')
988            {
989            *lenptr = 2;
990            p++;
991            }
992          else *lenptr = 1;
993          return p;
994    
995  /*************************************************  #ifndef EBCDIC
996  *                Help function                   *        case 0x85:    /* Unicode NEL */
997  *************************************************/        *lenptr = utf8? 2 : 1;
998          return p;
999    
1000          case 0x2028:  /* Unicode LS */
1001          case 0x2029:  /* Unicode PS */
1002          *lenptr = 3;
1003          return p;
1004    #endif  /* Not EBCDIC */
1005    
1006  static void        default:
1007          break;
1008          }
1009        }   /* End of loop for ANY case */
1010    
1011      *lenptr = 0;  /* Must have hit the end */
1012      return endptr;
1013      }     /* End of overall switch */
1014    }
1015    
1016    
1017    
1018    /*************************************************
1019    *         Find start of previous line            *
1020    *************************************************/
1021    
1022    /* This is called when looking back for before lines to print.
1023    
1024    Arguments:
1025      p         start of the subsequent line
1026      startptr  start of available data
1027    
1028    Returns:    pointer to the start of the previous line
1029    */
1030    
1031    static char *
1032    previous_line(char *p, char *startptr)
1033    {
1034    switch(endlinetype)
1035      {
1036      default:      /* Just in case */
1037      case EL_LF:
1038      p--;
1039      while (p > startptr && p[-1] != '\n') p--;
1040      return p;
1041    
1042      case EL_CR:
1043      p--;
1044      while (p > startptr && p[-1] != '\n') p--;
1045      return p;
1046    
1047      case EL_CRLF:
1048      for (;;)
1049        {
1050        p -= 2;
1051        while (p > startptr && p[-1] != '\n') p--;
1052        if (p <= startptr + 1 || p[-2] == '\r') return p;
1053        }
1054      return p;   /* But control should never get here */
1055    
1056      case EL_ANY:
1057      case EL_ANYCRLF:
1058      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1059      if (utf8) while ((*p & 0xc0) == 0x80) p--;
1060    
1061      while (p > startptr)
1062        {
1063        register int c;
1064        char *pp = p - 1;
1065    
1066        if (utf8)
1067          {
1068          int extra = 0;
1069          while ((*pp & 0xc0) == 0x80) pp--;
1070          c = *((unsigned char *)pp);
1071          if (c >= 0xc0)
1072            {
1073            int gcii, gcss;
1074            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
1075            gcss = 6*extra;
1076            c = (c & utf8_table3[extra]) << gcss;
1077            for (gcii = 1; gcii <= extra; gcii++)
1078              {
1079              gcss -= 6;
1080              c |= (pp[gcii] & 0x3f) << gcss;
1081              }
1082            }
1083          }
1084        else c = *((unsigned char *)pp);
1085    
1086        if (endlinetype == EL_ANYCRLF) switch (c)
1087          {
1088          case '\n':    /* LF */
1089          case '\r':    /* CR */
1090          return p;
1091    
1092          default:
1093          break;
1094          }
1095    
1096        else switch (c)
1097          {
1098          case '\n':    /* LF */
1099          case '\v':    /* VT */
1100          case '\f':    /* FF */
1101          case '\r':    /* CR */
1102    #ifndef EBCDIE
1103          case 0x85:    /* Unicode NEL */
1104          case 0x2028:  /* Unicode LS */
1105          case 0x2029:  /* Unicode PS */
1106    #endif  /* Not EBCDIC */
1107          return p;
1108    
1109          default:
1110          break;
1111          }
1112    
1113        p = pp;  /* Back one character */
1114        }        /* End of loop for ANY case */
1115    
1116      return startptr;  /* Hit start of data */
1117      }     /* End of overall switch */
1118    }
1119    
1120    
1121    
1122    
1123    
1124    /*************************************************
1125    *       Print the previous "after" lines         *
1126    *************************************************/
1127    
1128    /* This is called if we are about to lose said lines because of buffer filling,
1129    and at the end of the file. The data in the line is written using fwrite() so
1130    that a binary zero does not terminate it.
1131    
1132    Arguments:
1133      lastmatchnumber   the number of the last matching line, plus one
1134      lastmatchrestart  where we restarted after the last match
1135      endptr            end of available data
1136      printname         filename for printing
1137    
1138    Returns:            nothing
1139    */
1140    
1141    static void
1142    do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1143      char *printname)
1144    {
1145    if (after_context > 0 && lastmatchnumber > 0)
1146      {
1147      int count = 0;
1148      while (lastmatchrestart < endptr && count++ < after_context)
1149        {
1150        int ellength;
1151        char *pp = lastmatchrestart;
1152        if (printname != NULL) fprintf(stdout, "%s-", printname);
1153        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1154        pp = end_of_line(pp, endptr, &ellength);
1155        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1156        lastmatchrestart = pp;
1157        }
1158      hyphenpending = TRUE;
1159      }
1160    }
1161    
1162    
1163    
1164    /*************************************************
1165    *   Apply patterns to subject till one matches   *
1166    *************************************************/
1167    
1168    /* This function is called to run through all patterns, looking for a match. It
1169    is used multiple times for the same subject when colouring is enabled, in order
1170    to find all possible matches.
1171    
1172    Arguments:
1173      matchptr     the start of the subject
1174      length       the length of the subject to match
1175      startoffset  where to start matching
1176      offsets      the offets vector to fill in
1177      mrc          address of where to put the result of pcre_exec()
1178    
1179    Returns:      TRUE if there was a match
1180                  FALSE if there was no match
1181                  invert if there was a non-fatal error
1182    */
1183    
1184    static BOOL
1185    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
1186      int *mrc)
1187    {
1188    int i;
1189    size_t slen = length;
1190    patstr *p = patterns;
1191    const char *msg = "this text:\n\n";
1192    
1193    if (slen > 200)
1194      {
1195      slen = 200;
1196      msg = "text that starts:\n\n";
1197      }
1198    for (i = 1; p != NULL; p = p->next, i++)
1199      {
1200      *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1201        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1202      if (*mrc >= 0) return TRUE;
1203      if (*mrc == PCRE_ERROR_NOMATCH) continue;
1204      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1205      if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1206      fprintf(stderr, "%s", msg);
1207      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1208      fprintf(stderr, "\n\n");
1209      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1210          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1211        resource_error = TRUE;
1212      if (error_count++ > 20)
1213        {
1214        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1215        pcregrep_exit(2);
1216        }
1217      return invert;    /* No more matching; don't show the line again */
1218      }
1219    
1220    return FALSE;  /* No match, no errors */
1221    }
1222    
1223    
1224    
1225    /*************************************************
1226    *            Grep an individual file             *
1227    *************************************************/
1228    
1229    /* This is called from grep_or_recurse() below. It uses a buffer that is three
1230    times the value of bufthird. The matching point is never allowed to stray into
1231    the top third of the buffer, thus keeping more of the file available for
1232    context printing or for multiline scanning. For large files, the pointer will
1233    be in the middle third most of the time, so the bottom third is available for
1234    "before" context printing.
1235    
1236    Arguments:
1237      handle       the fopened FILE stream for a normal file
1238                   the gzFile pointer when reading is via libz
1239                   the BZFILE pointer when reading is via libbz2
1240      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1241      filename     the file name or NULL (for errors)
1242      printname    the file name if it is to be printed for each match
1243                   or NULL if the file name is not to be printed
1244                   it cannot be NULL if filenames[_nomatch]_only is set
1245    
1246    Returns:       0 if there was at least one match
1247                   1 otherwise (no matches)
1248                   2 if an overlong line is encountered
1249                   3 if there is a read error on a .bz2 file
1250    */
1251    
1252    static int
1253    pcregrep(void *handle, int frtype, char *filename, char *printname)
1254    {
1255    int rc = 1;
1256    int linenumber = 1;
1257    int lastmatchnumber = 0;
1258    int count = 0;
1259    int filepos = 0;
1260    int offsets[OFFSET_SIZE];
1261    char *lastmatchrestart = NULL;
1262    char *ptr = main_buffer;
1263    char *endptr;
1264    size_t bufflength;
1265    BOOL binary = FALSE;
1266    BOOL endhyphenpending = FALSE;
1267    BOOL input_line_buffered = line_buffered;
1268    FILE *in = NULL;                    /* Ensure initialized */
1269    
1270    #ifdef SUPPORT_LIBZ
1271    gzFile ingz = NULL;
1272    #endif
1273    
1274    #ifdef SUPPORT_LIBBZ2
1275    BZFILE *inbz2 = NULL;
1276    #endif
1277    
1278    
1279    /* Do the first read into the start of the buffer and set up the pointer to end
1280    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1281    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1282    fail. */
1283    
1284    #ifdef SUPPORT_LIBZ
1285    if (frtype == FR_LIBZ)
1286      {
1287      ingz = (gzFile)handle;
1288      bufflength = gzread (ingz, main_buffer, bufsize);
1289      }
1290    else
1291    #endif
1292    
1293    #ifdef SUPPORT_LIBBZ2
1294    if (frtype == FR_LIBBZ2)
1295      {
1296      inbz2 = (BZFILE *)handle;
1297      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1298      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1299      }                                    /* without the cast it is unsigned. */
1300    else
1301    #endif
1302    
1303      {
1304      in = (FILE *)handle;
1305      if (is_file_tty(in)) input_line_buffered = TRUE;
1306      bufflength = input_line_buffered?
1307        read_one_line(main_buffer, bufsize, in) :
1308        fread(main_buffer, 1, bufsize, in);
1309      }
1310    
1311    endptr = main_buffer + bufflength;
1312    
1313    /* Unless binary-files=text, see if we have a binary file. This uses the same
1314    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1315    file. */
1316    
1317    if (binary_files != BIN_TEXT)
1318      {
1319      binary =
1320        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1321      if (binary && binary_files == BIN_NOMATCH) return 1;
1322      }
1323    
1324    /* Loop while the current pointer is not at the end of the file. For large
1325    files, endptr will be at the end of the buffer when we are in the middle of the
1326    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1327    way, the buffer is shifted left and re-filled. */
1328    
1329    while (ptr < endptr)
1330      {
1331      int endlinelength;
1332      int mrc = 0;
1333      int startoffset = 0;
1334      BOOL match;
1335      char *matchptr = ptr;
1336      char *t = ptr;
1337      size_t length, linelength;
1338    
1339      /* At this point, ptr is at the start of a line. We need to find the length
1340      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1341      length remainder of the data in the buffer. Otherwise, it is the length of
1342      the next line, excluding the terminating newline. After matching, we always
1343      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1344      option is used for compiling, so that any match is constrained to be in the
1345      first line. */
1346    
1347      t = end_of_line(t, endptr, &endlinelength);
1348      linelength = t - ptr - endlinelength;
1349      length = multiline? (size_t)(endptr - ptr) : linelength;
1350    
1351      /* Check to see if the line we are looking at extends right to the very end
1352      of the buffer without a line terminator. This means the line is too long to
1353      handle. */
1354    
1355      if (endlinelength == 0 && t == main_buffer + bufsize)
1356        {
1357        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1358                        "pcregrep: check the --buffer-size option\n",
1359                        linenumber,
1360                        (filename == NULL)? "" : " of file ",
1361                        (filename == NULL)? "" : filename);
1362        return 2;
1363        }
1364    
1365      /* Extra processing for Jeffrey Friedl's debugging. */
1366    
1367    #ifdef JFRIEDL_DEBUG
1368      if (jfriedl_XT || jfriedl_XR)
1369      {
1370          #include <sys/time.h>
1371          #include <time.h>
1372          struct timeval start_time, end_time;
1373          struct timezone dummy;
1374          int i;
1375    
1376          if (jfriedl_XT)
1377          {
1378              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1379              const char *orig = ptr;
1380              ptr = malloc(newlen + 1);
1381              if (!ptr) {
1382                      printf("out of memory");
1383                      pcregrep_exit(2);
1384              }
1385              endptr = ptr;
1386              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1387              for (i = 0; i < jfriedl_XT; i++) {
1388                      strncpy(endptr, orig,  length);
1389                      endptr += length;
1390              }
1391              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1392              length = newlen;
1393          }
1394    
1395          if (gettimeofday(&start_time, &dummy) != 0)
1396                  perror("bad gettimeofday");
1397    
1398    
1399          for (i = 0; i < jfriedl_XR; i++)
1400              match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1401                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1402    
1403          if (gettimeofday(&end_time, &dummy) != 0)
1404                  perror("bad gettimeofday");
1405    
1406          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1407                          -
1408                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1409    
1410          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1411          return 0;
1412      }
1413    #endif
1414    
1415      /* We come back here after a match when the -o option (only_matching) is set,
1416      in order to find any further matches in the same line. */
1417    
1418      ONLY_MATCHING_RESTART:
1419    
1420      /* Run through all the patterns until one matches or there is an error other
1421      than NOMATCH. This code is in a subroutine so that it can be re-used for
1422      finding subsequent matches when colouring matched lines. */
1423    
1424      match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1425    
1426      /* If it's a match or a not-match (as required), do what's wanted. */
1427    
1428      if (match != invert)
1429        {
1430        BOOL hyphenprinted = FALSE;
1431    
1432        /* We've failed if we want a file that doesn't have any matches. */
1433    
1434        if (filenames == FN_NOMATCH_ONLY) return 1;
1435    
1436        /* Just count if just counting is wanted. */
1437    
1438        if (count_only) count++;
1439    
1440        /* When handling a binary file and binary-files==binary, the "binary"
1441        variable will be set true (it's false in all other cases). In this
1442        situation we just want to output the file name. No need to scan further. */
1443    
1444        else if (binary)
1445          {
1446          fprintf(stdout, "Binary file %s matches\n", filename);
1447          return 0;
1448          }
1449    
1450        /* If all we want is a file name, there is no need to scan any more lines
1451        in the file. */
1452    
1453        else if (filenames == FN_MATCH_ONLY)
1454          {
1455          fprintf(stdout, "%s\n", printname);
1456          return 0;
1457          }
1458    
1459        /* Likewise, if all we want is a yes/no answer. */
1460    
1461        else if (quiet) return 0;
1462    
1463        /* The --only-matching option prints just the substring that matched, or a
1464        captured portion of it, as long as this string is not empty, and the
1465        --file-offsets and --line-offsets options output offsets for the matching
1466        substring (they both force --only-matching = 0). None of these options
1467        prints any context. Afterwards, adjust the start and then jump back to look
1468        for further matches in the same line. If we are in invert mode, however,
1469        nothing is printed and we do not restart - this could still be useful
1470        because the return code is set. */
1471    
1472        else if (only_matching >= 0)
1473          {
1474          if (!invert)
1475            {
1476            if (printname != NULL) fprintf(stdout, "%s:", printname);
1477            if (number) fprintf(stdout, "%d:", linenumber);
1478            if (line_offsets)
1479              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1480                offsets[1] - offsets[0]);
1481            else if (file_offsets)
1482              fprintf(stdout, "%d,%d\n",
1483                (int)(filepos + matchptr + offsets[0] - ptr),
1484                offsets[1] - offsets[0]);
1485            else if (only_matching < mrc)
1486              {
1487              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1488              if (plen > 0)
1489                {
1490                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1491                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1492                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1493                fprintf(stdout, "\n");
1494                }
1495              }
1496            else if (printname != NULL || number) fprintf(stdout, "\n");
1497            match = FALSE;
1498            if (line_buffered) fflush(stdout);
1499            rc = 0;                      /* Had some success */
1500            startoffset = offsets[1];    /* Restart after the match */
1501            goto ONLY_MATCHING_RESTART;
1502            }
1503          }
1504    
1505        /* This is the default case when none of the above options is set. We print
1506        the matching lines(s), possibly preceded and/or followed by other lines of
1507        context. */
1508    
1509        else
1510          {
1511          /* See if there is a requirement to print some "after" lines from a
1512          previous match. We never print any overlaps. */
1513    
1514          if (after_context > 0 && lastmatchnumber > 0)
1515            {
1516            int ellength;
1517            int linecount = 0;
1518            char *p = lastmatchrestart;
1519    
1520            while (p < ptr && linecount < after_context)
1521              {
1522              p = end_of_line(p, ptr, &ellength);
1523              linecount++;
1524              }
1525    
1526            /* It is important to advance lastmatchrestart during this printing so
1527            that it interacts correctly with any "before" printing below. Print
1528            each line's data using fwrite() in case there are binary zeroes. */
1529    
1530            while (lastmatchrestart < p)
1531              {
1532              char *pp = lastmatchrestart;
1533              if (printname != NULL) fprintf(stdout, "%s-", printname);
1534              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1535              pp = end_of_line(pp, endptr, &ellength);
1536              FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1537              lastmatchrestart = pp;
1538              }
1539            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1540            }
1541    
1542          /* If there were non-contiguous lines printed above, insert hyphens. */
1543    
1544          if (hyphenpending)
1545            {
1546            fprintf(stdout, "--\n");
1547            hyphenpending = FALSE;
1548            hyphenprinted = TRUE;
1549            }
1550    
1551          /* See if there is a requirement to print some "before" lines for this
1552          match. Again, don't print overlaps. */
1553    
1554          if (before_context > 0)
1555            {
1556            int linecount = 0;
1557            char *p = ptr;
1558    
1559            while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1560                   linecount < before_context)
1561              {
1562              linecount++;
1563              p = previous_line(p, main_buffer);
1564              }
1565    
1566            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1567              fprintf(stdout, "--\n");
1568    
1569            while (p < ptr)
1570              {
1571              int ellength;
1572              char *pp = p;
1573              if (printname != NULL) fprintf(stdout, "%s-", printname);
1574              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1575              pp = end_of_line(pp, endptr, &ellength);
1576              FWRITE(p, 1, pp - p, stdout);
1577              p = pp;
1578              }
1579            }
1580    
1581          /* Now print the matching line(s); ensure we set hyphenpending at the end
1582          of the file if any context lines are being output. */
1583    
1584          if (after_context > 0 || before_context > 0)
1585            endhyphenpending = TRUE;
1586    
1587          if (printname != NULL) fprintf(stdout, "%s:", printname);
1588          if (number) fprintf(stdout, "%d:", linenumber);
1589    
1590          /* In multiline mode, we want to print to the end of the line in which
1591          the end of the matched string is found, so we adjust linelength and the
1592          line number appropriately, but only when there actually was a match
1593          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1594          the match will always be before the first newline sequence. */
1595    
1596          if (multiline & !invert)
1597            {
1598            char *endmatch = ptr + offsets[1];
1599            t = ptr;
1600            while (t < endmatch)
1601              {
1602              t = end_of_line(t, endptr, &endlinelength);
1603              if (t < endmatch) linenumber++; else break;
1604              }
1605            linelength = t - ptr - endlinelength;
1606            }
1607    
1608          /*** NOTE: Use only fwrite() to output the data line, so that binary
1609          zeroes are treated as just another data character. */
1610    
1611          /* This extra option, for Jeffrey Friedl's debugging requirements,
1612          replaces the matched string, or a specific captured string if it exists,
1613          with X. When this happens, colouring is ignored. */
1614    
1615    #ifdef JFRIEDL_DEBUG
1616          if (S_arg >= 0 && S_arg < mrc)
1617            {
1618            int first = S_arg * 2;
1619            int last  = first + 1;
1620            FWRITE(ptr, 1, offsets[first], stdout);
1621            fprintf(stdout, "X");
1622            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1623            }
1624          else
1625    #endif
1626    
1627          /* We have to split the line(s) up if colouring, and search for further
1628          matches, but not of course if the line is a non-match. */
1629    
1630          if (do_colour && !invert)
1631            {
1632            int plength;
1633            FWRITE(ptr, 1, offsets[0], stdout);
1634            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1635            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1636            fprintf(stdout, "%c[00m", 0x1b);
1637            for (;;)
1638              {
1639              startoffset = offsets[1];
1640              if (startoffset >= (int)linelength + endlinelength ||
1641                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1642                break;
1643              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1644              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1645              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1646              fprintf(stdout, "%c[00m", 0x1b);
1647              }
1648    
1649            /* In multiline mode, we may have already printed the complete line
1650            and its line-ending characters (if they matched the pattern), so there
1651            may be no more to print. */
1652    
1653            plength = (int)((linelength + endlinelength) - startoffset);
1654            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1655            }
1656    
1657          /* Not colouring; no need to search for further matches */
1658    
1659          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1660          }
1661    
1662        /* End of doing what has to be done for a match. If --line-buffered was
1663        given, flush the output. */
1664    
1665        if (line_buffered) fflush(stdout);
1666        rc = 0;    /* Had some success */
1667    
1668        /* Remember where the last match happened for after_context. We remember
1669        where we are about to restart, and that line's number. */
1670    
1671        lastmatchrestart = ptr + linelength + endlinelength;
1672        lastmatchnumber = linenumber + 1;
1673        }
1674    
1675      /* For a match in multiline inverted mode (which of course did not cause
1676      anything to be printed), we have to move on to the end of the match before
1677      proceeding. */
1678    
1679      if (multiline && invert && match)
1680        {
1681        int ellength;
1682        char *endmatch = ptr + offsets[1];
1683        t = ptr;
1684        while (t < endmatch)
1685          {
1686          t = end_of_line(t, endptr, &ellength);
1687          if (t <= endmatch) linenumber++; else break;
1688          }
1689        endmatch = end_of_line(endmatch, endptr, &ellength);
1690        linelength = endmatch - ptr - ellength;
1691        }
1692    
1693      /* Advance to after the newline and increment the line number. The file
1694      offset to the current line is maintained in filepos. */
1695    
1696      ptr += linelength + endlinelength;
1697      filepos += (int)(linelength + endlinelength);
1698      linenumber++;
1699    
1700      /* If input is line buffered, and the buffer is not yet full, read another
1701      line and add it into the buffer. */
1702    
1703      if (input_line_buffered && bufflength < (size_t)bufsize)
1704        {
1705        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1706        bufflength += add;
1707        endptr += add;
1708        }
1709    
1710      /* If we haven't yet reached the end of the file (the buffer is full), and
1711      the current point is in the top 1/3 of the buffer, slide the buffer down by
1712      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1713      about to be lost, print them. */
1714    
1715      if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1716        {
1717        if (after_context > 0 &&
1718            lastmatchnumber > 0 &&
1719            lastmatchrestart < main_buffer + bufthird)
1720          {
1721          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1722          lastmatchnumber = 0;
1723          }
1724    
1725        /* Now do the shuffle */
1726    
1727        memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1728        ptr -= bufthird;
1729    
1730    #ifdef SUPPORT_LIBZ
1731        if (frtype == FR_LIBZ)
1732          bufflength = 2*bufthird +
1733            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1734        else
1735    #endif
1736    
1737    #ifdef SUPPORT_LIBBZ2
1738        if (frtype == FR_LIBBZ2)
1739          bufflength = 2*bufthird +
1740            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1741        else
1742    #endif
1743    
1744        bufflength = 2*bufthird +
1745          (input_line_buffered?
1746           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1747           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1748        endptr = main_buffer + bufflength;
1749    
1750        /* Adjust any last match point */
1751    
1752        if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1753        }
1754      }     /* Loop through the whole file */
1755    
1756    /* End of file; print final "after" lines if wanted; do_after_lines sets
1757    hyphenpending if it prints something. */
1758    
1759    if (only_matching < 0 && !count_only)
1760      {
1761      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1762      hyphenpending |= endhyphenpending;
1763      }
1764    
1765    /* Print the file name if we are looking for those without matches and there
1766    were none. If we found a match, we won't have got this far. */
1767    
1768    if (filenames == FN_NOMATCH_ONLY)
1769      {
1770      fprintf(stdout, "%s\n", printname);
1771      return 0;
1772      }
1773    
1774    /* Print the match count if wanted */
1775    
1776    if (count_only)
1777      {
1778      if (count > 0 || !omit_zero_count)
1779        {
1780        if (printname != NULL && filenames != FN_NONE)
1781          fprintf(stdout, "%s:", printname);
1782        fprintf(stdout, "%d\n", count);
1783        }
1784      }
1785    
1786    return rc;
1787    }
1788    
1789    
1790    
1791    /*************************************************
1792    *     Grep a file or recurse into a directory    *
1793    *************************************************/
1794    
1795    /* Given a path name, if it's a directory, scan all the files if we are
1796    recursing; if it's a file, grep it.
1797    
1798    Arguments:
1799      pathname          the path to investigate
1800      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1801      only_one_at_top   TRUE if the path is the only one at toplevel
1802    
1803    Returns:  -1 the file/directory was skipped
1804               0 if there was at least one match
1805               1 if there were no matches
1806               2 there was some kind of error
1807    
1808    However, file opening failures are suppressed if "silent" is set.
1809    */
1810    
1811    static int
1812    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1813    {
1814    int rc = 1;
1815    int frtype;
1816    void *handle;
1817    char *lastcomp;
1818    FILE *in = NULL;           /* Ensure initialized */
1819    
1820    #ifdef SUPPORT_LIBZ
1821    gzFile ingz = NULL;
1822    #endif
1823    
1824    #ifdef SUPPORT_LIBBZ2
1825    BZFILE *inbz2 = NULL;
1826    #endif
1827    
1828    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1829    int pathlen;
1830    #endif
1831    
1832    /* If the file name is "-" we scan stdin */
1833    
1834    if (strcmp(pathname, "-") == 0)
1835      {
1836      return pcregrep(stdin, FR_PLAIN, stdin_name,
1837        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1838          stdin_name : NULL);
1839      }
1840    
1841    /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
1842    directories, whereas --include and --exclude apply to everything else. The test
1843    is against the final component of the path. */
1844    
1845    lastcomp = strrchr(pathname, FILESEP);
1846    lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
1847    
1848    /* If the file is a directory, skip if not recursing or if explicitly excluded.
1849    Otherwise, scan the directory and recurse for each path within it. The scanning
1850    code is localized so it can be made system-specific. */
1851    
1852    if (isdirectory(pathname))
1853      {
1854      if (dee_action == dee_SKIP ||
1855          !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
1856        return -1;
1857    
1858      if (dee_action == dee_RECURSE)
1859        {
1860        char buffer[1024];
1861        char *nextfile;
1862        directory_type *dir = opendirectory(pathname);
1863    
1864        if (dir == NULL)
1865          {
1866          if (!silent)
1867            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1868              strerror(errno));
1869          return 2;
1870          }
1871    
1872        while ((nextfile = readdirectory(dir)) != NULL)
1873          {
1874          int frc;
1875          sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
1876          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1877          if (frc > 1) rc = frc;
1878           else if (frc == 0 && rc == 1) rc = 0;
1879          }
1880    
1881        closedirectory(dir);
1882        return rc;
1883        }
1884      }
1885    
1886    /* If the file is not a directory and not a regular file, skip it if that's
1887    been requested. Otherwise, check for explicit include/exclude. */
1888    
1889    else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
1890              !test_incexc(lastcomp, include_patterns, exclude_patterns))
1891            return -1;
1892    
1893    /* Control reaches here if we have a regular file, or if we have a directory
1894    and recursion or skipping was not requested, or if we have anything else and
1895    skipping was not requested. The scan proceeds. If this is the first and only
1896    argument at top level, we don't show the file name, unless we are only showing
1897    the file name, or the filename was forced (-H). */
1898    
1899    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1900    pathlen = (int)(strlen(pathname));
1901    #endif
1902    
1903    /* Open using zlib if it is supported and the file name ends with .gz. */
1904    
1905    #ifdef SUPPORT_LIBZ
1906    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1907      {
1908      ingz = gzopen(pathname, "rb");
1909      if (ingz == NULL)
1910        {
1911        if (!silent)
1912          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1913            strerror(errno));
1914        return 2;
1915        }
1916      handle = (void *)ingz;
1917      frtype = FR_LIBZ;
1918      }
1919    else
1920    #endif
1921    
1922    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1923    
1924    #ifdef SUPPORT_LIBBZ2
1925    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1926      {
1927      inbz2 = BZ2_bzopen(pathname, "rb");
1928      handle = (void *)inbz2;
1929      frtype = FR_LIBBZ2;
1930      }
1931    else
1932    #endif
1933    
1934    /* Otherwise use plain fopen(). The label is so that we can come back here if
1935    an attempt to read a .bz2 file indicates that it really is a plain file. */
1936    
1937    #ifdef SUPPORT_LIBBZ2
1938    PLAIN_FILE:
1939    #endif
1940      {
1941      in = fopen(pathname, "rb");
1942      handle = (void *)in;
1943      frtype = FR_PLAIN;
1944      }
1945    
1946    /* All the opening methods return errno when they fail. */
1947    
1948    if (handle == NULL)
1949      {
1950      if (!silent)
1951        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1952          strerror(errno));
1953      return 2;
1954      }
1955    
1956    /* Now grep the file */
1957    
1958    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1959      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1960    
1961    /* Close in an appropriate manner. */
1962    
1963    #ifdef SUPPORT_LIBZ
1964    if (frtype == FR_LIBZ)
1965      gzclose(ingz);
1966    else
1967    #endif
1968    
1969    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1970    read failed. If the error indicates that the file isn't in fact bzipped, try
1971    again as a normal file. */
1972    
1973    #ifdef SUPPORT_LIBBZ2
1974    if (frtype == FR_LIBBZ2)
1975      {
1976      if (rc == 3)
1977        {
1978        int errnum;
1979        const char *err = BZ2_bzerror(inbz2, &errnum);
1980        if (errnum == BZ_DATA_ERROR_MAGIC)
1981          {
1982          BZ2_bzclose(inbz2);
1983          goto PLAIN_FILE;
1984          }
1985        else if (!silent)
1986          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1987            pathname, err);
1988        rc = 2;    /* The normal "something went wrong" code */
1989        }
1990      BZ2_bzclose(inbz2);
1991      }
1992    else
1993    #endif
1994    
1995    /* Normal file close */
1996    
1997    fclose(in);
1998    
1999    /* Pass back the yield from pcregrep(). */
2000    
2001    return rc;
2002    }
2003    
2004    
2005    
2006    
2007    /*************************************************
2008    *                Usage function                  *
2009    *************************************************/
2010    
2011    static int
2012    usage(int rc)
2013    {
2014    option_item *op;
2015    fprintf(stderr, "Usage: pcregrep [-");
2016    for (op = optionlist; op->one_char != 0; op++)
2017      {
2018      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
2019      }
2020    fprintf(stderr, "] [long options] [pattern] [files]\n");
2021    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
2022      "options.\n");
2023    return rc;
2024    }
2025    
2026    
2027    
2028    
2029    /*************************************************
2030    *                Help function                   *
2031    *************************************************/
2032    
2033    static void
2034  help(void)  help(void)
2035  {  {
2036  option_item *op;  option_item *op;
2037    
2038  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
2039  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
2040  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
2041  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
2042    
2043    #ifdef SUPPORT_LIBZ
2044    printf("Files whose names end in .gz are read using zlib.\n");
2045    #endif
2046    
2047    #ifdef SUPPORT_LIBBZ2
2048    printf("Files whose names end in .bz2 are read using bzlib2.\n");
2049    #endif
2050    
2051    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2052    printf("Other files and the standard input are read as plain files.\n\n");
2053    #else
2054    printf("All files are read as plain files, without any interpretation.\n\n");
2055    #endif
2056    
2057    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
2058  printf("Options:\n");  printf("Options:\n");
2059    
2060  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
2061    {    {
2062    int n;    int n;
2063    char s[4];    char s[4];
2064    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
2065    printf("  %s --%s%n", s, op->long_name, &n);    /* Two options were accidentally implemented and documented with underscores
2066    n = 30 - n;    instead of hyphens in their names, something that was not noticed for quite a
2067      few releases. When fixing this, I left the underscored versions in the list
2068      in case people were using them. However, we don't want to display them in the
2069      help data. There are no other options that contain underscores, and we do not
2070      expect ever to implement such options. Therefore, just omit any option that
2071      contains an underscore. */
2072    
2073      if (strchr(op->long_name, '_') != NULL) continue;
2074    
2075      if (op->one_char > 0 && (op->long_name)[0] == 0)
2076        n = 31 - printf("  -%c", op->one_char);
2077      else
2078        {
2079        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
2080          else strcpy(s, "   ");
2081        n = 31 - printf("  %s --%s", s, op->long_name);
2082        }
2083    
2084    if (n < 1) n = 1;    if (n < 1) n = 1;
2085    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                           ", op->help_text);
2086    }    }
2087    
2088  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
2089  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
2090  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("When reading patterns or file names from a file, trailing white\n");
2091  printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("space is removed and blank lines are ignored.\n");
2092    printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
2093    
2094  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
2095  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
2096  }  }
2097    
# Line 431  printf("Exit status is 0 if any matches, Line 2099  printf("Exit status is 0 if any matches,
2099    
2100    
2101  /*************************************************  /*************************************************
2102  *                Handle an option                *  *    Handle a single-letter, no data option      *
2103  *************************************************/  *************************************************/
2104    
2105  static int  static int
2106  handle_option(int letter, int options)  handle_option(int letter, int options)
2107  {  {
2108  switch(letter)  switch(letter)
2109      {
2110      case N_FOFFSETS: file_offsets = TRUE; break;
2111      case N_HELP: help(); pcregrep_exit(0);
2112      case N_LBUFFER: line_buffered = TRUE; break;
2113      case N_LOFFSETS: line_offsets = number = TRUE; break;
2114      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2115      case 'a': binary_files = BIN_TEXT; break;
2116      case 'c': count_only = TRUE; break;
2117      case 'F': process_options |= PO_FIXED_STRINGS; break;
2118      case 'H': filenames = FN_FORCE; break;
2119      case 'I': binary_files = BIN_NOMATCH; break;
2120      case 'h': filenames = FN_NONE; break;
2121      case 'i': options |= PCRE_CASELESS; break;
2122      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2123      case 'L': filenames = FN_NOMATCH_ONLY; break;
2124      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2125      case 'n': number = TRUE; break;
2126      case 'o': only_matching = 0; break;
2127      case 'q': quiet = TRUE; break;
2128      case 'r': dee_action = dee_RECURSE; break;
2129      case 's': silent = TRUE; break;
2130      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2131      case 'v': invert = TRUE; break;
2132      case 'w': process_options |= PO_WORD_MATCH; break;
2133      case 'x': process_options |= PO_LINE_MATCH; break;
2134    
2135      case 'V':
2136      fprintf(stdout, "pcregrep version %s\n", pcre_version());
2137      pcregrep_exit(0);
2138      break;
2139    
2140      default:
2141      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2142      pcregrep_exit(usage(2));
2143      }
2144    
2145    return options;
2146    }
2147    
2148    
2149    
2150    
2151    /*************************************************
2152    *          Construct printed ordinal             *
2153    *************************************************/
2154    
2155    /* This turns a number into "1st", "3rd", etc. */
2156    
2157    static char *
2158    ordin(int n)
2159    {
2160    static char buffer[8];
2161    char *p = buffer;
2162    sprintf(p, "%d", n);
2163    while (*p != 0) p++;
2164    switch (n%10)
2165      {
2166      case 1: strcpy(p, "st"); break;
2167      case 2: strcpy(p, "nd"); break;
2168      case 3: strcpy(p, "rd"); break;
2169      default: strcpy(p, "th"); break;
2170      }
2171    return buffer;
2172    }
2173    
2174    
2175    
2176    /*************************************************
2177    *          Compile a single pattern              *
2178    *************************************************/
2179    
2180    /* Do nothing if the pattern has already been compiled. This is the case for
2181    include/exclude patterns read from a file.
2182    
2183    When the -F option has been used, each "pattern" may be a list of strings,
2184    separated by line breaks. They will be matched literally. We split such a
2185    string and compile the first substring, inserting an additional block into the
2186    pattern chain.
2187    
2188    Arguments:
2189      p              points to the pattern block
2190      options        the PCRE options
2191      popts          the processing options
2192      fromfile       TRUE if the pattern was read from a file
2193      fromtext       file name or identifying text (e.g. "include")
2194      count          0 if this is the only command line pattern, or
2195                     number of the command line pattern, or
2196                     linenumber for a pattern from a file
2197    
2198    Returns:         TRUE on success, FALSE after an error
2199    */
2200    
2201    static BOOL
2202    compile_pattern(patstr *p, int options, int popts, int fromfile,
2203      const char *fromtext, int count)
2204    {
2205    char buffer[PATBUFSIZE];
2206    const char *error;
2207    char *ps = p->string;
2208    int patlen = strlen(ps);
2209    int errptr;
2210    
2211    if (p->compiled != NULL) return TRUE;
2212    
2213    if ((popts & PO_FIXED_STRINGS) != 0)
2214      {
2215      int ellength;
2216      char *eop = ps + patlen;
2217      char *pe = end_of_line(ps, eop, &ellength);
2218    
2219      if (ellength != 0)
2220        {
2221        if (add_pattern(pe, p) == NULL) return FALSE;
2222        patlen = (int)(pe - ps - ellength);
2223        }
2224      }
2225    
2226    sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2227    p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2228    if (p->compiled != NULL) return TRUE;
2229    
2230    /* Handle compile errors */
2231    
2232    errptr -= (int)strlen(prefix[popts]);
2233    if (errptr > patlen) errptr = patlen;
2234    
2235    if (fromfile)
2236      {
2237      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2238        "at offset %d: %s\n", count, fromtext, errptr, error);
2239      }
2240    else
2241      {
2242      if (count == 0)
2243        fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2244          fromtext, errptr, error);
2245      else
2246        fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2247          ordin(count), fromtext, errptr, error);
2248      }
2249    
2250    return FALSE;
2251    }
2252    
2253    
2254    
2255    /*************************************************
2256    *     Read and compile a file of patterns        *
2257    *************************************************/
2258    
2259    /* This is used for --filelist, --include-from, and --exclude-from.
2260    
2261    Arguments:
2262      name         the name of the file; "-" is stdin
2263      patptr       pointer to the pattern chain anchor
2264      patlastptr   pointer to the last pattern pointer
2265      popts        the process options to pass to pattern_compile()
2266    
2267    Returns:       TRUE if all went well
2268    */
2269    
2270    static BOOL
2271    read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2272    {
2273    int linenumber = 0;
2274    FILE *f;
2275    char *filename;
2276    char buffer[PATBUFSIZE];
2277    
2278    if (strcmp(name, "-") == 0)
2279      {
2280      f = stdin;
2281      filename = stdin_name;
2282      }
2283    else
2284      {
2285      f = fopen(name, "r");
2286      if (f == NULL)
2287        {
2288        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2289        return FALSE;
2290        }
2291      filename = name;
2292      }
2293    
2294    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2295    {    {
2296    case -1:  help(); exit(0);    char *s = buffer + (int)strlen(buffer);
2297    case 'c': count_only = TRUE; break;    while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2298    case 'h': filenames = FALSE; break;    *s = 0;
2299    case 'i': options |= PCRE_CASELESS; break;    linenumber++;
2300    case 'l': filenames_only = TRUE;    if (buffer[0] == 0) continue;   /* Skip blank lines */
   case 'n': number = TRUE; break;  
   case 'r': recurse = TRUE; break;  
   case 's': silent = TRUE; break;  
   case 'u': options |= PCRE_UTF8; break;  
   case 'v': invert = TRUE; break;  
   case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
2301    
2302    case 'V':    /* Note: this call to add_pattern() puts a pointer to the local variable
2303    fprintf(stderr, "pcregrep version %s using ", VERSION);    "buffer" into the pattern chain. However, that pointer is used only when
2304    fprintf(stderr, "PCRE version %s\n", pcre_version());    compiling the pattern, which happens immediately below, so we flatten it
2305    exit(0);    afterwards, as a precaution against any later code trying to use it. */
2306    break;  
2307      *patlastptr = add_pattern(buffer, *patlastptr);
2308      if (*patlastptr == NULL) return FALSE;
2309      if (*patptr == NULL) *patptr = *patlastptr;
2310    
2311      /* This loop is needed because compiling a "pattern" when -F is set may add
2312      on additional literal patterns if the original contains a newline. In the
2313      common case, it never will, because fgets() stops at a newline. However,
2314      the -N option can be used to give pcregrep a different newline setting. */
2315    
2316    default:    for(;;)
2317    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);      {
2318    exit(usage(2));      if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2319            linenumber))
2320          return FALSE;
2321        (*patlastptr)->string = NULL;            /* Insurance */
2322        if ((*patlastptr)->next == NULL) break;
2323        *patlastptr = (*patlastptr)->next;
2324        }
2325    }    }
2326    
2327  return options;  if (f != stdin) fclose(f);
2328    return TRUE;
2329  }  }
2330    
2331    
2332    
   
2333  /*************************************************  /*************************************************
2334  *                Main program                    *  *                Main program                    *
2335  *************************************************/  *************************************************/
2336    
2337    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2338    
2339  int  int
2340  main(int argc, char **argv)  main(int argc, char **argv)
2341  {  {
2342  int i, j;  int i, j;
2343  int rc = 1;  int rc = 1;
 int options = 0;  
 int errptr;  
 const char *error;  
2344  BOOL only_one_at_top;  BOOL only_one_at_top;
2345    patstr *cp;
2346    fnstr *fn;
2347    const char *locale_from = "--locale";
2348    const char *error;
2349    
2350    #ifdef SUPPORT_PCREGREP_JIT
2351    pcre_jit_stack *jit_stack = NULL;
2352    #endif
2353    
2354    /* Set the default line ending value from the default in the PCRE library;
2355    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2356    Note that the return values from pcre_config(), though derived from the ASCII
2357    codes, are the same in EBCDIC environments, so we must use the actual values
2358    rather than escapes such as as '\r'. */
2359    
2360    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2361    switch(i)
2362      {
2363      default:               newline = (char *)"lf"; break;
2364      case 13:               newline = (char *)"cr"; break;
2365      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2366      case -1:               newline = (char *)"any"; break;
2367      case -2:               newline = (char *)"anycrlf"; break;
2368      }
2369    
2370  /* Process the options */  /* Process the options */
2371    
2372  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
2373    {    {
2374      option_item *op = NULL;
2375      char *option_data = (char *)"";    /* default to keep compiler happy */
2376      BOOL longop;
2377      BOOL longopwasequals = FALSE;
2378    
2379    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2380    
2381    /* Missing options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2382      but only if we have previously had -e or -f to define the patterns. */
2383    
2384    if (argv[i][1] == 0) exit(usage(2));    if (argv[i][1] == 0)
2385        {
2386        if (pattern_files != NULL || patterns != NULL) break;
2387          else pcregrep_exit(usage(2));
2388        }
2389    
2390    /* Long name options */    /* Handle a long name option, or -- to terminate the options */
2391    
2392    if (argv[i][1] == '-')    if (argv[i][1] == '-')
2393      {      {
2394      option_item *op;      char *arg = argv[i] + 2;
2395        char *argequals = strchr(arg, '=');
2396    
2397      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
2398        {        {
2399        pattern_filename = argv[i] + 7;        i++;
2400        continue;        break;                /* out of the options-handling loop */
2401        }        }
2402    
2403        longop = TRUE;
2404    
2405        /* Some long options have data that follows after =, for example file=name.
2406        Some options have variations in the long name spelling: specifically, we
2407        allow "regexp" because GNU grep allows it, though I personally go along
2408        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2409        These options are entered in the table as "regex(p)". Options can be in
2410        both these categories. */
2411    
2412      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2413        {        {
2414        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
2415          char *equals = strchr(op->long_name, '=');
2416    
2417          /* Handle options with only one spelling of the name */
2418    
2419          if (opbra == NULL)     /* Does not contain '(' */
2420            {
2421            if (equals == NULL)  /* Not thing=data case */
2422              {
2423              if (strcmp(arg, op->long_name) == 0) break;
2424              }
2425            else                 /* Special case xxx=data */
2426              {
2427              int oplen = (int)(equals - op->long_name);
2428              int arglen = (argequals == NULL)?
2429                (int)strlen(arg) : (int)(argequals - arg);
2430              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2431                {
2432                option_data = arg + arglen;
2433                if (*option_data == '=')
2434                  {
2435                  option_data++;
2436                  longopwasequals = TRUE;
2437                  }
2438                break;
2439                }
2440              }
2441            }
2442    
2443          /* Handle options with an alternate spelling of the name */
2444    
2445          else
2446          {          {
2447          options = handle_option(op->one_char, options);          char buff1[24];
2448          break;          char buff2[24];
2449    
2450            int baselen = (int)(opbra - op->long_name);
2451            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2452            int arglen = (argequals == NULL || equals == NULL)?
2453              (int)strlen(arg) : (int)(argequals - arg);
2454    
2455            sprintf(buff1, "%.*s", baselen, op->long_name);
2456            sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2457    
2458            if (strncmp(arg, buff1, arglen) == 0 ||
2459               strncmp(arg, buff2, arglen) == 0)
2460              {
2461              if (equals != NULL && argequals != NULL)
2462                {
2463                option_data = argequals;
2464                if (*option_data == '=')
2465                  {
2466                  option_data++;
2467                  longopwasequals = TRUE;
2468                  }
2469                }
2470              break;
2471              }
2472          }          }
2473        }        }
2474    
2475      if (op->one_char == 0)      if (op->one_char == 0)
2476        {        {
2477        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2478        exit(usage(2));        pcregrep_exit(usage(2));
2479        }        }
2480      }      }
2481    
2482    /* One-char options */    /* Jeffrey Friedl's debugging harness uses these additional options which
2483      are not in the right form for putting in the option table because they use
2484      only one hyphen, yet are more than one character long. By putting them
2485      separately here, they will not get displayed as part of the help() output,
2486      but I don't think Jeffrey will care about that. */
2487    
2488    #ifdef JFRIEDL_DEBUG
2489      else if (strcmp(argv[i], "-pre") == 0) {
2490              jfriedl_prefix = argv[++i];
2491              continue;
2492      } else if (strcmp(argv[i], "-post") == 0) {
2493              jfriedl_postfix = argv[++i];
2494              continue;
2495      } else if (strcmp(argv[i], "-XT") == 0) {
2496              sscanf(argv[++i], "%d", &jfriedl_XT);
2497              continue;
2498      } else if (strcmp(argv[i], "-XR") == 0) {
2499              sscanf(argv[++i], "%d", &jfriedl_XR);
2500              continue;
2501      }
2502    #endif
2503    
2504    
2505      /* One-char options; many that have no data may be in a single argument; we
2506      continue till we hit the last one or one that needs data. */
2507    
2508    else    else
2509      {      {
2510      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2511        longop = FALSE;
2512      while (*s != 0)      while (*s != 0)
2513        {        {
2514        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2515          {          {
2516          pattern_filename = s + 1;          if (*s == op->one_char) break;
2517          if (pattern_filename[0] == 0)          }
2518            {        if (op->one_char == 0)
2519            if (i >= argc - 1)          {
2520              {          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2521              fprintf(stderr, "pcregrep: File name missing after -f\n");            *s, argv[i]);
2522              exit(usage(2));          pcregrep_exit(usage(2));
2523              }          }
2524            pattern_filename = argv[++i];  
2525            }        /* Check for a single-character option that has data: OP_OP_NUMBER
2526          break;        is used for one that either has a numerical number or defaults, i.e. the
2527          data is optional. If a digit follows, there is data; if not, carry on
2528          with other single-character options in the same string. */
2529    
2530          option_data = s+1;
2531          if (op->type == OP_OP_NUMBER)
2532            {
2533            if (isdigit((unsigned char)s[1])) break;
2534            }
2535          else   /* Check for end or a dataless option */
2536            {
2537            if (op->type != OP_NODATA || s[1] == 0) break;
2538            }
2539    
2540          /* Handle a single-character option with no data, then loop for the
2541          next character in the string. */
2542    
2543          pcre_options = handle_option(*s++, pcre_options);
2544          }
2545        }
2546    
2547      /* At this point we should have op pointing to a matched option. If the type
2548      is NO_DATA, it means that there is no data, and the option might set
2549      something in the PCRE options. */
2550    
2551      if (op->type == OP_NODATA)
2552        {
2553        pcre_options = handle_option(op->one_char, pcre_options);
2554        continue;
2555        }
2556    
2557      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2558      either has a value or defaults to something. It cannot have data in a
2559      separate item. At the moment, the only such options are "colo(u)r",
2560      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2561    
2562      if (*option_data == 0 &&
2563          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2564        {
2565        switch (op->one_char)
2566          {
2567          case N_COLOUR:
2568          colour_option = (char *)"auto";
2569          break;
2570    
2571          case 'o':
2572          only_matching = 0;
2573          break;
2574    
2575    #ifdef JFRIEDL_DEBUG
2576          case 'S':
2577          S_arg = 0;
2578          break;
2579    #endif
2580          }
2581        continue;
2582        }
2583    
2584      /* Otherwise, find the data string for the option. */
2585    
2586      if (*option_data == 0)
2587        {
2588        if (i >= argc - 1 || longopwasequals)
2589          {
2590          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2591          pcregrep_exit(usage(2));
2592          }
2593        option_data = argv[++i];
2594        }
2595    
2596      /* If the option type is OP_PATLIST, it's the -e option, or one of the
2597      include/exclude options, which can be called multiple times to create lists
2598      of patterns. */
2599    
2600      if (op->type == OP_PATLIST)
2601         {
2602         patdatastr *pd = (patdatastr *)op->dataptr;
2603         *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2604         if (*(pd->lastptr) == NULL) goto EXIT2;
2605         if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2606         }
2607    
2608      /* If the option type is OP_FILELIST, it's one of the options that names a
2609      file. */
2610    
2611      else if (op->type == OP_FILELIST)
2612        {
2613        fndatastr *fd = (fndatastr *)op->dataptr;
2614        fn = (fnstr *)malloc(sizeof(fnstr));
2615        if (fn == NULL)
2616          {
2617          fprintf(stderr, "pcregrep: malloc failed\n");
2618          goto EXIT2;
2619          }
2620        fn->next = NULL;
2621        fn->name = option_data;
2622        if (*(fd->anchor) == NULL)
2623          *(fd->anchor) = fn;
2624        else
2625          (*(fd->lastptr))->next = fn;
2626        *(fd->lastptr) = fn;
2627        }
2628    
2629      /* Handle OP_BINARY_FILES */
2630    
2631      else if (op->type == OP_BINFILES)
2632        {
2633        if (strcmp(option_data, "binary") == 0)
2634          binary_files = BIN_BINARY;
2635        else if (strcmp(option_data, "without-match") == 0)
2636          binary_files = BIN_NOMATCH;
2637        else if (strcmp(option_data, "text") == 0)
2638          binary_files = BIN_TEXT;
2639        else
2640          {
2641          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2642            option_data);
2643          pcregrep_exit(usage(2));
2644          }
2645        }
2646    
2647      /* Otherwise, deal with single string or numeric data values. */
2648    
2649      else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2650               op->type != OP_OP_NUMBER)
2651        {
2652        *((char **)op->dataptr) = option_data;
2653        }
2654    
2655      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2656      only for unpicking arguments, so just keep it simple. */
2657    
2658      else
2659        {
2660        unsigned long int n = 0;
2661        char *endptr = option_data;
2662        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2663        while (isdigit((unsigned char)(*endptr)))
2664          n = n * 10 + (int)(*endptr++ - '0');
2665        if (toupper(*endptr) == 'K')
2666          {
2667          n *= 1024;
2668          endptr++;
2669          }
2670        else if (toupper(*endptr) == 'M')
2671          {
2672          n *= 1024*1024;
2673          endptr++;
2674          }
2675        if (*endptr != 0)
2676          {
2677          if (longop)
2678            {
2679            char *equals = strchr(op->long_name, '=');
2680            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2681              (int)(equals - op->long_name);
2682            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2683              option_data, nlen, op->long_name);
2684          }          }
2685        else options = handle_option(*s++, options);        else
2686            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2687              option_data, op->one_char);
2688          pcregrep_exit(usage(2));
2689        }        }
2690        if (op->type == OP_LONGNUMBER)
2691            *((unsigned long int *)op->dataptr) = n;
2692        else
2693            *((int *)op->dataptr) = n;
2694      }      }
2695    }    }
2696    
2697  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* Options have been decoded. If -C was used, its value is used as a default
2698  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  for -A and -B. */
2699    
2700  if (pattern_list == NULL || hints_list == NULL)  if (both_context > 0)
2701    {    {
2702    fprintf(stderr, "pcregrep: malloc failed\n");    if (after_context == 0) after_context = both_context;
2703    return 2;    if (before_context == 0) before_context = both_context;
2704    }    }
2705    
2706  /* Compile the regular expression(s). */  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2707    However, the latter two set only_matching. */
2708    
2709  if (pattern_filename != NULL)  if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2710        (file_offsets && line_offsets))
2711    {    {
2712    FILE *f = fopen(pattern_filename, "r");    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2713    char buffer[BUFSIZ];      "and/or --line-offsets\n");
2714    if (f == NULL)    pcregrep_exit(usage(2));
2715      }
2716    
2717    if (file_offsets || line_offsets) only_matching = 0;
2718    
2719    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2720    LC_ALL environment variable is set, and if so, use it. */
2721    
2722    if (locale == NULL)
2723      {
2724      locale = getenv("LC_ALL");
2725      locale_from = "LCC_ALL";
2726      }
2727    
2728    if (locale == NULL)
2729      {
2730      locale = getenv("LC_CTYPE");
2731      locale_from = "LC_CTYPE";
2732      }
2733    
2734    /* If a locale has been provided, set it, and generate the tables the PCRE
2735    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2736    
2737    if (locale != NULL)
2738      {
2739      if (setlocale(LC_CTYPE, locale) == NULL)
2740      {      {
2741      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2742        strerror(errno));        locale, locale_from);
2743        return 2;
2744        }
2745      pcretables = pcre_maketables();
2746      }
2747    
2748    /* Sort out colouring */
2749    
2750    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2751      {
2752      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2753      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2754      else
2755        {
2756        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2757          colour_option);
2758      return 2;      return 2;
2759      }      }
2760    while (fgets(buffer, sizeof(buffer), f) != NULL)    if (do_colour)
2761      {      {
2762      char *s = buffer + (int)strlen(buffer);      char *cs = getenv("PCREGREP_COLOUR");
2763      if (pattern_count >= MAX_PATTERN_COUNT)      if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2764        {      if (cs != NULL) colour_string = cs;
       fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",  
         MAX_PATTERN_COUNT);  
       return 2;  
       }  
     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;  
     if (s == buffer) continue;  
     *s = 0;  
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr, error);  
       return 2;  
       }  
2765      }      }
   fclose(f);  
2766    }    }
2767    
2768  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2769    
2770    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2771      {
2772      pcre_options |= PCRE_NEWLINE_CR;
2773      endlinetype = EL_CR;
2774      }
2775    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2776      {
2777      pcre_options |= PCRE_NEWLINE_LF;
2778      endlinetype = EL_LF;
2779      }
2780    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2781      {
2782      pcre_options |= PCRE_NEWLINE_CRLF;
2783      endlinetype = EL_CRLF;
2784      }
2785    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2786      {
2787      pcre_options |= PCRE_NEWLINE_ANY;
2788      endlinetype = EL_ANY;
2789      }
2790    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2791      {
2792      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2793      endlinetype = EL_ANYCRLF;
2794      }
2795  else  else
2796    {    {
2797    if (i >= argc) return usage(2);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2798    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2799    if (pattern_list[0] == NULL)    }
2800    
2801    /* Interpret the text values for -d and -D */
2802    
2803    if (dee_option != NULL)
2804      {
2805      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2806      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2807      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2808      else
2809        {
2810        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2811        return 2;
2812        }
2813      }
2814    
2815    if (DEE_option != NULL)
2816      {
2817      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2818      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2819      else
2820      {      {
2821      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
       error);  
2822      return 2;      return 2;
2823      }      }
   pattern_count++;  
2824    }    }
2825    
2826  /* Study the regular expressions, as we will be running them may times */  /* Check the values for Jeffrey Friedl's debugging options. */
2827    
2828    #ifdef JFRIEDL_DEBUG
2829    if (S_arg > 9)
2830      {
2831      fprintf(stderr, "pcregrep: bad value for -S option\n");
2832      return 2;
2833      }
2834    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2835      {
2836      if (jfriedl_XT == 0) jfriedl_XT = 1;
2837      if (jfriedl_XR == 0) jfriedl_XR = 1;
2838      }
2839    #endif
2840    
2841    /* Get memory for the main buffer. */
2842    
2843    bufsize = 3*bufthird;
2844    main_buffer = (char *)malloc(bufsize);
2845    
2846    if (main_buffer == NULL)
2847      {
2848      fprintf(stderr, "pcregrep: malloc failed\n");
2849      goto EXIT2;
2850      }
2851    
2852    /* If no patterns were provided by -e, and there are no files provided by -f,
2853    the first argument is the one and only pattern, and it must exist. */
2854    
2855    if (patterns == NULL && pattern_files == NULL)
2856      {
2857      if (i >= argc) return usage(2);
2858      patterns = patterns_last = add_pattern(argv[i++], NULL);
2859      if (patterns == NULL) goto EXIT2;
2860      }
2861    
2862    /* Compile the patterns that were provided on the command line, either by
2863    multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2864    after all the command-line options are read so that we know which PCRE options
2865    to use. When -F is used, compile_pattern() may add another block into the
2866    chain, so we must not access the next pointer till after the compile. */
2867    
2868    for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2869      {
2870      if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
2871           (j == 1 && patterns->next == NULL)? 0 : j))
2872        goto EXIT2;
2873      }
2874    
2875    /* Read and compile the regular expressions that are provided in files. */
2876    
2877    for (fn = pattern_files; fn != NULL; fn = fn->next)
2878      {
2879      if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
2880        goto EXIT2;
2881      }
2882    
2883    /* Study the regular expressions, as we will be running them many times. Unless
2884    JIT has been explicitly disabled, arrange a stack for it to use. */
2885    
2886    #ifdef SUPPORT_PCREGREP_JIT
2887    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2888      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2889    #endif
2890    
2891  for (j = 0; j < pattern_count; j++)  for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2892    {    {
2893    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    cp->hint = pcre_study(cp->compiled, study_options, &error);
2894    if (error != NULL)    if (error != NULL)
2895      {      {
2896      char s[16];      char s[16];
2897      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
2898      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2899      return 2;      goto EXIT2;
2900        }
2901    #ifdef SUPPORT_PCREGREP_JIT
2902      if (jit_stack != NULL && cp->hint != NULL)
2903        pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
2904    #endif
2905      }
2906    
2907    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2908    pcre_extra block for each pattern. */
2909    
2910    if (match_limit > 0 || match_limit_recursion > 0)
2911      {
2912      for (cp = patterns; cp != NULL; cp = cp->next)
2913        {
2914        if (cp->hint == NULL)
2915          {
2916          cp->hint = (pcre_extra *)malloc(sizeof(pcre_extra));
2917          if (cp->hint == NULL)
2918            {
2919            fprintf(stderr, "pcregrep: malloc failed\n");
2920            pcregrep_exit(2);
2921            }
2922          }
2923        if (match_limit > 0)
2924          {
2925          cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
2926          cp->hint->match_limit = match_limit;
2927          }
2928        if (match_limit_recursion > 0)
2929          {
2930          cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2931          cp->hint->match_limit_recursion = match_limit_recursion;
2932          }
2933      }      }
2934    }    }
2935    
2936  /* If there are no further arguments, do the business on stdin and exit */  /* If there are include or exclude patterns read from the command line, compile
2937    them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
2938    0. */
2939    
2940  if (i >= argc) return pcregrep(stdin, NULL);  for (j = 0; j < 4; j++)
2941      {
2942      int k;
2943      for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
2944        {
2945        if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
2946             (k == 1 && cp->next == NULL)? 0 : k))
2947          goto EXIT2;
2948        }
2949      }
2950    
2951    /* Read and compile include/exclude patterns from files. */
2952    
2953    for (fn = include_from; fn != NULL; fn = fn->next)
2954      {
2955      if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
2956        goto EXIT2;
2957      }
2958    
2959    for (fn = exclude_from; fn != NULL; fn = fn->next)
2960      {
2961      if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
2962        goto EXIT2;
2963      }
2964    
2965    /* If there are no files that contain lists of files to search, and there are
2966    no file arguments, search stdin, and then exit. */
2967    
2968    if (file_lists == NULL && i >= argc)
2969      {
2970      rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2971        (filenames > FN_DEFAULT)? stdin_name : NULL);
2972      goto EXIT;
2973      }
2974    
2975    /* If any files that contains a list of files to search have been specified,
2976    read them line by line and search the given files. */
2977    
2978  /* Otherwise, work through the remaining arguments as files or directories.  for (fn = file_lists; fn != NULL; fn = fn->next)
2979  Pass in the fact that there is only one argument at top level - this suppresses    {
2980  the file name if the argument is not a directory. */    char buffer[PATBUFSIZE];
2981      FILE *fl;
2982      if (strcmp(fn->name, "-") == 0) fl = stdin; else
2983        {
2984        fl = fopen(fn->name, "rb");
2985        if (fl == NULL)
2986          {
2987          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
2988            strerror(errno));
2989          goto EXIT2;
2990          }
2991        }
2992      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2993        {
2994        int frc;
2995        char *end = buffer + (int)strlen(buffer);
2996        while (end > buffer && isspace(end[-1])) end--;
2997        *end = 0;
2998        if (*buffer != 0)
2999          {
3000          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3001          if (frc > 1) rc = frc;
3002            else if (frc == 0 && rc == 1) rc = 0;
3003          }
3004        }
3005      if (fl != stdin) fclose(fl);
3006      }
3007    
3008    /* After handling file-list, work through remaining arguments. Pass in the fact
3009    that there is only one argument at top level - this suppresses the file name if
3010    the argument is not a directory and filenames are not otherwise forced. */
3011    
3012  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1 && file_lists == NULL;
 if (filenames_only) filenames = TRUE;  
3013    
3014  for (; i < argc; i++)  for (; i < argc; i++)
3015    {    {
3016    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3017    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
3018      if (frc > 1) rc = frc;
3019        else if (frc == 0 && rc == 1) rc = 0;
3020    }    }
3021    
3022  return rc;  EXIT:
3023    #ifdef SUPPORT_PCREGREP_JIT
3024    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3025    #endif
3026    
3027    if (main_buffer != NULL) free(main_buffer);
3028    
3029    free_pattern_chain(patterns);
3030    free_pattern_chain(include_patterns);
3031    free_pattern_chain(include_dir_patterns);
3032    free_pattern_chain(exclude_patterns);
3033    free_pattern_chain(exclude_dir_patterns);
3034    
3035    free_file_chain(exclude_from);
3036    free_file_chain(include_from);
3037    free_file_chain(pattern_files);
3038    free_file_chain(file_lists);
3039    
3040    pcregrep_exit(rc);
3041    
3042    EXIT2:
3043    rc = 2;
3044    goto EXIT;
3045  }  }
3046    
3047  /* End */  /* End of pcregrep */

Legend:
Removed from v.63  
changed lines
  Added in v.1033

  ViewVC Help
Powered by ViewVC 1.1.5