/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 561 by ph10, Sat Oct 30 18:37:47 2010 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2010 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 70  its pattern matching. On a Unix system i
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "2.0 01-Aug-2001"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define MBUFTHIRD BUFSIZ
78    #else
79    #define MBUFTHIRD 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
118  *               Global variables                 *  *               Global variables                 *
119  *************************************************/  *************************************************/
120    
121    /* Jeffrey Friedl has some debugging requirements that are not part of the
122    regular code. */
123    
124    #ifdef JFRIEDL_DEBUG
125    static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130    #endif
131    
132    static int  endlinetype;
133    
134    static char *colour_string = (char *)"1;31";
135    static char *colour_option = NULL;
136    static char *dee_option = NULL;
137    static char *DEE_option = NULL;
138    static char *newline = NULL;
139  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
140    static char *stdin_name = (char *)"(standard input)";
141    static char *locale = NULL;
142    
143    static const unsigned char *pcretables = NULL;
144    
145  static int  pattern_count = 0;  static int  pattern_count = 0;
146  static pcre **pattern_list;  static pcre **pattern_list = NULL;
147  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
148    
149    static char *include_pattern = NULL;
150    static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154    static pcre *include_compiled = NULL;
155    static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159    static int after_context = 0;
160    static int before_context = 0;
161    static int both_context = 0;
162    static int dee_action = dee_READ;
163    static int DEE_action = DEE_READ;
164    static int error_count = 0;
165    static int filenames = FN_DEFAULT;
166    static int process_options = 0;
167    
168    static unsigned long int match_limit = 0;
169    static unsigned long int match_limit_recursion = 0;
170    
171  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
172  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
173  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
174    static BOOL hyphenpending = FALSE;
175  static BOOL invert = FALSE;  static BOOL invert = FALSE;
176    static BOOL line_buffered = FALSE;
177    static BOOL line_offsets = FALSE;
178    static BOOL multiline = FALSE;
179  static BOOL number = FALSE;  static BOOL number = FALSE;
180  static BOOL recurse = FALSE;  static BOOL omit_zero_count = FALSE;
181    static BOOL only_matching = FALSE;
182    static BOOL resource_error = FALSE;
183    static BOOL quiet = FALSE;
184  static BOOL silent = FALSE;  static BOOL silent = FALSE;
185  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
186    
187  /* Structure for options and list of them */  /* Structure for options and list of them */
188    
189    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
190           OP_PATLIST };
191    
192  typedef struct option_item {  typedef struct option_item {
193      int type;
194    int one_char;    int one_char;
195    char *long_name;    void *dataptr;
196    char *help_text;    const char *long_name;
197      const char *help_text;
198  } option_item;  } option_item;
199    
200    /* Options without a single-letter equivalent get a negative value. This can be
201    used to identify them. */
202    
203    #define N_COLOUR       (-1)
204    #define N_EXCLUDE      (-2)
205    #define N_EXCLUDE_DIR  (-3)
206    #define N_HELP         (-4)
207    #define N_INCLUDE      (-5)
208    #define N_INCLUDE_DIR  (-6)
209    #define N_LABEL        (-7)
210    #define N_LOCALE       (-8)
211    #define N_NULL         (-9)
212    #define N_LOFFSETS     (-10)
213    #define N_FOFFSETS     (-11)
214    #define N_LBUFFER      (-12)
215    #define N_M_LIMIT      (-13)
216    #define N_M_LIMIT_REC  (-14)
217    
218  static option_item optionlist[] = {  static option_item optionlist[] = {
219    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
220    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
221    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
222    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
223    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
224    { 'n', "line-number",  "print line number with output lines" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
225    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
226    { 's', "no-messages",  "suppress error messages" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
227    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
228    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
229    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
230    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
231    { 0,    NULL,           NULL }    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
232      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
233      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
234      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
235      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
236      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
237      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
238      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
239      { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
240      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
241      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
242      { OP_NUMBER,    N_M_LIMIT,&match_limit,      "match-limit=number", "set PCRE match limit option" },
243      { OP_NUMBER,    N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
245      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
247      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
248      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
249      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
250      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
251      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
252      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
253      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
254    #ifdef JFRIEDL_DEBUG
255      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
256    #endif
257      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
258      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
259      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
260      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
261      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
262      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
263      { OP_NODATA,    0,        NULL,               NULL,            NULL }
264  };  };
265    
266    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
267    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
268    that the combination of -w and -x has the same effect as -x on its own, so we
269    can treat them as the same. */
270    
271    static const char *prefix[] = {
272      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
273    
274    static const char *suffix[] = {
275      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
276    
277    /* UTF-8 tables - used only when the newline setting is "any". */
278    
279    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
280    
281    const char utf8_table4[] = {
282      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
283      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
284      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
285      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
286    
287    
288    
289  /*************************************************  /*************************************************
290  *       Functions for directory scanning         *  *            OS-specific functions               *
291  *************************************************/  *************************************************/
292    
293  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
294  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
295    
296    
297  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
298    
299  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
300  #include <sys/types.h>  #include <sys/types.h>
301  #include <sys/stat.h>  #include <sys/stat.h>
302  #include <dirent.h>  #include <dirent.h>
303    
304  typedef DIR directory_type;  typedef DIR directory_type;
305    
306  int  static int
307  isdirectory(char *filename)  isdirectory(char *filename)
308  {  {
309  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 312  if (stat(filename, &statbuf) < 0)
312  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
313  }  }
314    
315  directory_type *  static directory_type *
316  opendirectory(char *filename)  opendirectory(char *filename)
317  {  {
318  return opendir(filename);  return opendir(filename);
319  }  }
320    
321  char *  static char *
322  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
323  {  {
324  for (;;)  for (;;)
# Line 108  for (;;) Line 328  for (;;)
328    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
329      return dent->d_name;      return dent->d_name;
330    }    }
331    /* Control never reaches here */
332    }
333    
334    static void
335    closedirectory(directory_type *dir)
336    {
337    closedir(dir);
338    }
339    
340    
341    /************* Test for regular file in Unix **********/
342    
343    static int
344    isregfile(char *filename)
345    {
346    struct stat statbuf;
347    if (stat(filename, &statbuf) < 0)
348      return 1;        /* In the expectation that opening as a file will fail */
349    return (statbuf.st_mode & S_IFMT) == S_IFREG;
350    }
351    
352    
353    /************* Test for a terminal in Unix **********/
354    
355    static BOOL
356    is_stdout_tty(void)
357    {
358    return isatty(fileno(stdout));
359    }
360    
361    static BOOL
362    is_file_tty(FILE *f)
363    {
364    return isatty(fileno(f));
365    }
366    
367    
368    /************* Directory scanning in Win32 ***********/
369    
370    /* I (Philip Hazel) have no means of testing this code. It was contributed by
371    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
372    when it did not exist. David Byron added a patch that moved the #include of
373    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
374    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
375    undefined when it is indeed undefined. */
376    
377    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
378    
379    #ifndef STRICT
380    # define STRICT
381    #endif
382    #ifndef WIN32_LEAN_AND_MEAN
383    # define WIN32_LEAN_AND_MEAN
384    #endif
385    
386    #include <windows.h>
387    
388    #ifndef INVALID_FILE_ATTRIBUTES
389    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
390    #endif
391    
392    typedef struct directory_type
393    {
394    HANDLE handle;
395    BOOL first;
396    WIN32_FIND_DATA data;
397    } directory_type;
398    
399    int
400    isdirectory(char *filename)
401    {
402    DWORD attr = GetFileAttributes(filename);
403    if (attr == INVALID_FILE_ATTRIBUTES)
404      return 0;
405    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
406    }
407    
408    directory_type *
409    opendirectory(char *filename)
410    {
411    size_t len;
412    char *pattern;
413    directory_type *dir;
414    DWORD err;
415    len = strlen(filename);
416    pattern = (char *) malloc(len + 3);
417    dir = (directory_type *) malloc(sizeof(*dir));
418    if ((pattern == NULL) || (dir == NULL))
419      {
420      fprintf(stderr, "pcregrep: malloc failed\n");
421      pcregrep_exit(2);
422      }
423    memcpy(pattern, filename, len);
424    memcpy(&(pattern[len]), "\\*", 3);
425    dir->handle = FindFirstFile(pattern, &(dir->data));
426    if (dir->handle != INVALID_HANDLE_VALUE)
427      {
428      free(pattern);
429      dir->first = TRUE;
430      return dir;
431      }
432    err = GetLastError();
433    free(pattern);
434    free(dir);
435    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
436    return NULL;
437    }
438    
439    char *
440    readdirectory(directory_type *dir)
441    {
442    for (;;)
443      {
444      if (!dir->first)
445        {
446        if (!FindNextFile(dir->handle, &(dir->data)))
447          return NULL;
448        }
449      else
450        {
451        dir->first = FALSE;
452        }
453      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
454        return dir->data.cFileName;
455      }
456    #ifndef _MSC_VER
457  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
458    #endif
459  }  }
460    
461  void  void
462  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
463  {  {
464  closedir(dir);  FindClose(dir->handle);
465    free(dir);
466  }  }
467    
468    
469  #else  /************* Test for regular file in Win32 **********/
470    
471    /* I don't know how to do this, or if it can be done; assume all paths are
472    regular if they are not directories. */
473    
474    int isregfile(char *filename)
475    {
476    return !isdirectory(filename);
477    }
478    
479    
480    /************* Test for a terminal in Win32 **********/
481    
482    /* I don't know how to do this; assume never */
483    
484    static BOOL
485    is_stdout_tty(void)
486    {
487    return FALSE;
488    }
489    
490    static BOOL
491    is_file_tty(FILE *f)
492    {
493    return FALSE;
494    }
495    
496    
497  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
498    
499  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
500    
501    #else
502    
503  typedef void directory_type;  typedef void directory_type;
504    
505  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
506  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
507  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
508  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
509    
510    
511    /************* Test for regular when we can't do it **********/
512    
513    /* Assume all files are regular. */
514    
515    int isregfile(char *filename) { return 1; }
516    
517    
518    /************* Test for a terminal when we can't do it **********/
519    
520    static BOOL
521    is_stdout_tty(void)
522    {
523    return FALSE;
524    }
525    
526    static BOOL
527    is_file_tty(FILE *f)
528    {
529    return FALSE;
530    }
531    
532  #endif  #endif
533    
534    
535    
536  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
537  /*************************************************  /*************************************************
538  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
539  *************************************************/  *************************************************/
# Line 159  return sys_errlist[n]; Line 556  return sys_errlist[n];
556    
557    
558  /*************************************************  /*************************************************
559  *              Grep an individual file           *  *         Exit from the program                  *
560  *************************************************/  *************************************************/
561    
562  static int  /* If there has been a resource error, give a suitable message.
 pcregrep(FILE *in, char *name)  
 {  
 int rc = 1;  
 int linenumber = 0;  
 int count = 0;  
 int offsets[99];  
 char buffer[BUFSIZ];  
563    
564  while (fgets(buffer, sizeof(buffer), in) != NULL)  Argument:  the return code
565    Returns:   does not return
566    */
567    
568    static void
569    pcregrep_exit(int rc)
570    {
571    if (resource_error)
572    {    {
573    BOOL match = FALSE;    fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
574    int i;      "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
575    int length = (int)strlen(buffer);    fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
576    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    }
   linenumber++;  
577    
578    for (i = 0; !match && i < pattern_count; i++)  exit(rc);
579      {  }
     match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,  
       offsets, 99) >= 0;  
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
     }  
580    
   if (match != invert)  
     {  
     if (count_only) count++;  
581    
     else if (filenames_only)  
       {  
       fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);  
       return 0;  
       }  
582    
583      else if (silent) return 0;  /*************************************************
584    *            Read one line of input              *
585    *************************************************/
586    
587      else  /* Normally, input is read using fread() into a large buffer, so many lines may
588        {  be read at once. However, doing this for tty input means that no output appears
589        if (name != NULL) fprintf(stdout, "%s:", name);  until a lot of input has been typed. Instead, tty input is handled line by
590        if (number) fprintf(stdout, "%d:", linenumber);  line. We cannot use fgets() for this, because it does not stop at a binary
591        fprintf(stdout, "%s\n", buffer);  zero, and therefore there is no way of telling how many characters it has read,
592        }  because there may be binary zeros embedded in the data.
593    
594    Arguments:
595      buffer     the buffer to read into
596      length     the maximum number of characters to read
597      f          the file
598    
599      rc = 0;  Returns:     the number of characters read, zero at end of file
600      }  */
   }  
601    
602  if (count_only)  static int
603    read_one_line(char *buffer, int length, FILE *f)
604    {
605    int c;
606    int yield = 0;
607    while ((c = fgetc(f)) != EOF)
608    {    {
609    if (name != NULL) fprintf(stdout, "%s:", name);    buffer[yield++] = c;
610    fprintf(stdout, "%d\n", count);    if (c == '\n' || yield >= length) break;
611    }    }
612    return yield;
 return rc;  
613  }  }
614    
615    
616    
   
617  /*************************************************  /*************************************************
618  *     Grep a file or recurse into a directory    *  *             Find end of line                   *
619  *************************************************/  *************************************************/
620    
621  static int  /* The length of the endline sequence that is found is set via lenptr. This may
622  grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  be zero at the very end of the file if there is no line-ending sequence there.
623    BOOL only_one_at_top)  
624  {  Arguments:
625  int rc = 1;    p         current position in line
626  int sep;    endptr    end of available data
627  FILE *in;    lenptr    where to put the length of the eol sequence
628    
629  /* If the file is a directory and we are recursing, scan each file within it.  Returns:    pointer to the last byte of the line
630  The scanning code is localized so it can be made system-specific. */  */
631    
632  if ((sep = isdirectory(filename)) != 0 && recurse)  static char *
633    end_of_line(char *p, char *endptr, int *lenptr)
634    {
635    switch(endlinetype)
636    {    {
637    char buffer[1024];    default:      /* Just in case */
638    char *nextfile;    case EL_LF:
639    directory_type *dir = opendirectory(filename);    while (p < endptr && *p != '\n') p++;
640      if (p < endptr)
641        {
642        *lenptr = 1;
643        return p + 1;
644        }
645      *lenptr = 0;
646      return endptr;
647    
648    if (dir == NULL)    case EL_CR:
649      while (p < endptr && *p != '\r') p++;
650      if (p < endptr)
651      {      {
652      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,      *lenptr = 1;
653        strerror(errno));      return p + 1;
     return 2;  
654      }      }
655      *lenptr = 0;
656      return endptr;
657    
658    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
659      for (;;)
660      {      {
661      int frc;      while (p < endptr && *p != '\r') p++;
662      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      if (++p >= endptr)
663      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);        {
664      if (frc == 0 && rc == 1) rc = 0;        *lenptr = 0;
665          return endptr;
666          }
667        if (*p == '\n')
668          {
669          *lenptr = 2;
670          return p + 1;
671          }
672      }      }
673      break;
674    
675    closedirectory(dir);    case EL_ANYCRLF:
676    return rc;    while (p < endptr)
677    }      {
678        int extra = 0;
679        register int c = *((unsigned char *)p);
680    
681  /* If the file is not a directory, or we are not recursing, scan it. If this is      if (utf8 && c >= 0xc0)
682  the first and only argument at top level, we don't show the file name.        {
683  Otherwise, control is via the show_filenames variable. */        int gcii, gcss;
684          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
685          gcss = 6*extra;
686          c = (c & utf8_table3[extra]) << gcss;
687          for (gcii = 1; gcii <= extra; gcii++)
688            {
689            gcss -= 6;
690            c |= (p[gcii] & 0x3f) << gcss;
691            }
692          }
693    
694  in = fopen(filename, "r");      p += 1 + extra;
 if (in == NULL)  
   {  
   fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));  
   return 2;  
   }  
695    
696  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);      switch (c)
697  fclose(in);        {
698  return rc;        case 0x0a:    /* LF */
699  }        *lenptr = 1;
700          return p;
701    
702          case 0x0d:    /* CR */
703          if (p < endptr && *p == 0x0a)
704            {
705            *lenptr = 2;
706            p++;
707            }
708          else *lenptr = 1;
709          return p;
710    
711          default:
712          break;
713          }
714        }   /* End of loop for ANYCRLF case */
715    
716      *lenptr = 0;  /* Must have hit the end */
717      return endptr;
718    
719  /*************************************************    case EL_ANY:
720  *                Usage function                  *    while (p < endptr)
721  *************************************************/      {
722        int extra = 0;
723        register int c = *((unsigned char *)p);
724    
725  static int      if (utf8 && c >= 0xc0)
726  usage(int rc)        {
727  {        int gcii, gcss;
728  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
729  fprintf(stderr, "Type `pcregrep --help' for more information.\n");        gcss = 6*extra;
730  return rc;        c = (c & utf8_table3[extra]) << gcss;
731  }        for (gcii = 1; gcii <= extra; gcii++)
732            {
733            gcss -= 6;
734            c |= (p[gcii] & 0x3f) << gcss;
735            }
736          }
737    
738        p += 1 + extra;
739    
740        switch (c)
741          {
742          case 0x0a:    /* LF */
743          case 0x0b:    /* VT */
744          case 0x0c:    /* FF */
745          *lenptr = 1;
746          return p;
747    
748          case 0x0d:    /* CR */
749          if (p < endptr && *p == 0x0a)
750            {
751            *lenptr = 2;
752            p++;
753            }
754          else *lenptr = 1;
755          return p;
756    
757          case 0x85:    /* NEL */
758          *lenptr = utf8? 2 : 1;
759          return p;
760    
761          case 0x2028:  /* LS */
762          case 0x2029:  /* PS */
763          *lenptr = 3;
764          return p;
765    
766          default:
767          break;
768          }
769        }   /* End of loop for ANY case */
770    
771      *lenptr = 0;  /* Must have hit the end */
772      return endptr;
773      }     /* End of overall switch */
774    }
775    
776    
777    
778  /*************************************************  /*************************************************
779  *                Help function                   *  *         Find start of previous line            *
780  *************************************************/  *************************************************/
781    
782  static void  /* This is called when looking back for before lines to print.
 help(void)  
 {  
 option_item *op;  
783    
784  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  Arguments:
785  printf("Search for PATTERN in each FILE or standard input.\n");    p         start of the subsequent line
786  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");    startptr  start of available data
787    
788  printf("Options:\n");  Returns:    pointer to the start of the previous line
789    */
790    
791  for (op = optionlist; op->one_char != 0; op++)  static char *
792    previous_line(char *p, char *startptr)
793    {
794    switch(endlinetype)
795    {    {
796    int n;    default:      /* Just in case */
797    char s[4];    case EL_LF:
798    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    p--;
799    printf("  %s --%s%n", s, op->long_name, &n);    while (p > startptr && p[-1] != '\n') p--;
800    n = 30 - n;    return p;
801    if (n < 1) n = 1;  
802    printf("%.*s%s\n", n, "                    ", op->help_text);    case EL_CR:
803    }    p--;
804      while (p > startptr && p[-1] != '\n') p--;
805      return p;
806    
807  printf("\n  -f<filename>  or  --file=<filename>\n");    case EL_CRLF:
808  printf("    Read patterns from <filename> instead of using a command line option.\n");    for (;;)
809  printf("    Trailing white space is removed; blanks lines are ignored.\n");      {
810  printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);      p -= 2;
811        while (p > startptr && p[-1] != '\n') p--;
812        if (p <= startptr + 1 || p[-2] == '\r') return p;
813        }
814      return p;   /* But control should never get here */
815    
816  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");    case EL_ANY:
817  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");    case EL_ANYCRLF:
818  }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
819      if (utf8) while ((*p & 0xc0) == 0x80) p--;
820    
821      while (p > startptr)
822        {
823        register int c;
824        char *pp = p - 1;
825    
826        if (utf8)
827          {
828          int extra = 0;
829          while ((*pp & 0xc0) == 0x80) pp--;
830          c = *((unsigned char *)pp);
831          if (c >= 0xc0)
832            {
833            int gcii, gcss;
834            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
835            gcss = 6*extra;
836            c = (c & utf8_table3[extra]) << gcss;
837            for (gcii = 1; gcii <= extra; gcii++)
838              {
839              gcss -= 6;
840              c |= (pp[gcii] & 0x3f) << gcss;
841              }
842            }
843          }
844        else c = *((unsigned char *)pp);
845    
846        if (endlinetype == EL_ANYCRLF) switch (c)
847          {
848          case 0x0a:    /* LF */
849          case 0x0d:    /* CR */
850          return p;
851    
852          default:
853          break;
854          }
855    
856        else switch (c)
857          {
858          case 0x0a:    /* LF */
859          case 0x0b:    /* VT */
860          case 0x0c:    /* FF */
861          case 0x0d:    /* CR */
862          case 0x85:    /* NEL */
863          case 0x2028:  /* LS */
864          case 0x2029:  /* PS */
865          return p;
866    
867          default:
868          break;
869          }
870    
871        p = pp;  /* Back one character */
872        }        /* End of loop for ANY case */
873    
874      return startptr;  /* Hit start of data */
875      }     /* End of overall switch */
876    }
877    
878    
879    
880    
881    
882    /*************************************************
883    *       Print the previous "after" lines         *
884    *************************************************/
885    
886    /* This is called if we are about to lose said lines because of buffer filling,
887    and at the end of the file. The data in the line is written using fwrite() so
888    that a binary zero does not terminate it.
889    
890    Arguments:
891      lastmatchnumber   the number of the last matching line, plus one
892      lastmatchrestart  where we restarted after the last match
893      endptr            end of available data
894      printname         filename for printing
895    
896    Returns:            nothing
897    */
898    
899    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
900      char *endptr, char *printname)
901    {
902    if (after_context > 0 && lastmatchnumber > 0)
903      {
904      int count = 0;
905      while (lastmatchrestart < endptr && count++ < after_context)
906        {
907        int ellength;
908        char *pp = lastmatchrestart;
909        if (printname != NULL) fprintf(stdout, "%s-", printname);
910        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
911        pp = end_of_line(pp, endptr, &ellength);
912        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
913        lastmatchrestart = pp;
914        }
915      hyphenpending = TRUE;
916      }
917    }
918    
919    
920    
921    /*************************************************
922    *   Apply patterns to subject till one matches   *
923    *************************************************/
924    
925    /* This function is called to run through all patterns, looking for a match. It
926    is used multiple times for the same subject when colouring is enabled, in order
927    to find all possible matches.
928    
929    Arguments:
930      matchptr    the start of the subject
931      length      the length of the subject to match
932      offsets     the offets vector to fill in
933      mrc         address of where to put the result of pcre_exec()
934    
935    Returns:      TRUE if there was a match
936                  FALSE if there was no match
937                  invert if there was a non-fatal error
938    */
939    
940    static BOOL
941    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
942    {
943    int i;
944    size_t slen = length;
945    const char *msg = "this text:\n\n";
946    if (slen > 200)
947      {
948      slen = 200;
949      msg = "text that starts:\n\n";
950      }
951    for (i = 0; i < pattern_count; i++)
952      {
953      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
954        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
955      if (*mrc >= 0) return TRUE;
956      if (*mrc == PCRE_ERROR_NOMATCH) continue;
957      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
958      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
959      fprintf(stderr, "%s", msg);
960      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
961      fprintf(stderr, "\n\n");
962      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
963        resource_error = TRUE;
964      if (error_count++ > 20)
965        {
966        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
967        pcregrep_exit(2);
968        }
969      return invert;    /* No more matching; don't show the line again */
970      }
971    
972    return FALSE;  /* No match, no errors */
973    }
974    
975    
976    
977    /*************************************************
978    *            Grep an individual file             *
979    *************************************************/
980    
981    /* This is called from grep_or_recurse() below. It uses a buffer that is three
982    times the value of MBUFTHIRD. The matching point is never allowed to stray into
983    the top third of the buffer, thus keeping more of the file available for
984    context printing or for multiline scanning. For large files, the pointer will
985    be in the middle third most of the time, so the bottom third is available for
986    "before" context printing.
987    
988    Arguments:
989      handle       the fopened FILE stream for a normal file
990                   the gzFile pointer when reading is via libz
991                   the BZFILE pointer when reading is via libbz2
992      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
993      printname    the file name if it is to be printed for each match
994                   or NULL if the file name is not to be printed
995                   it cannot be NULL if filenames[_nomatch]_only is set
996    
997    Returns:       0 if there was at least one match
998                   1 otherwise (no matches)
999                   2 if there is a read error on a .bz2 file
1000    */
1001    
1002    static int
1003    pcregrep(void *handle, int frtype, char *printname)
1004    {
1005    int rc = 1;
1006    int linenumber = 1;
1007    int lastmatchnumber = 0;
1008    int count = 0;
1009    int filepos = 0;
1010    int offsets[OFFSET_SIZE];
1011    char *lastmatchrestart = NULL;
1012    char buffer[3*MBUFTHIRD];
1013    char *ptr = buffer;
1014    char *endptr;
1015    size_t bufflength;
1016    BOOL endhyphenpending = FALSE;
1017    BOOL input_line_buffered = line_buffered;
1018    FILE *in = NULL;                    /* Ensure initialized */
1019    
1020    #ifdef SUPPORT_LIBZ
1021    gzFile ingz = NULL;
1022    #endif
1023    
1024    #ifdef SUPPORT_LIBBZ2
1025    BZFILE *inbz2 = NULL;
1026    #endif
1027    
1028    
1029    /* Do the first read into the start of the buffer and set up the pointer to end
1030    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1031    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1032    fail. */
1033    
1034    #ifdef SUPPORT_LIBZ
1035    if (frtype == FR_LIBZ)
1036      {
1037      ingz = (gzFile)handle;
1038      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1039      }
1040    else
1041    #endif
1042    
1043    #ifdef SUPPORT_LIBBZ2
1044    if (frtype == FR_LIBBZ2)
1045      {
1046      inbz2 = (BZFILE *)handle;
1047      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1048      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1049      }                                    /* without the cast it is unsigned. */
1050    else
1051    #endif
1052    
1053      {
1054      in = (FILE *)handle;
1055      if (is_file_tty(in)) input_line_buffered = TRUE;
1056      bufflength = input_line_buffered?
1057        read_one_line(buffer, 3*MBUFTHIRD, in) :
1058        fread(buffer, 1, 3*MBUFTHIRD, in);
1059      }
1060    
1061    endptr = buffer + bufflength;
1062    
1063    /* Loop while the current pointer is not at the end of the file. For large
1064    files, endptr will be at the end of the buffer when we are in the middle of the
1065    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1066    way, the buffer is shifted left and re-filled. */
1067    
1068    while (ptr < endptr)
1069      {
1070      int endlinelength;
1071      int mrc = 0;
1072      BOOL match;
1073      char *matchptr = ptr;
1074      char *t = ptr;
1075      size_t length, linelength;
1076    
1077      /* At this point, ptr is at the start of a line. We need to find the length
1078      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1079      length remainder of the data in the buffer. Otherwise, it is the length of
1080      the next line, excluding the terminating newline. After matching, we always
1081      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1082      option is used for compiling, so that any match is constrained to be in the
1083      first line. */
1084    
1085      t = end_of_line(t, endptr, &endlinelength);
1086      linelength = t - ptr - endlinelength;
1087      length = multiline? (size_t)(endptr - ptr) : linelength;
1088    
1089      /* Extra processing for Jeffrey Friedl's debugging. */
1090    
1091    #ifdef JFRIEDL_DEBUG
1092      if (jfriedl_XT || jfriedl_XR)
1093      {
1094          #include <sys/time.h>
1095          #include <time.h>
1096          struct timeval start_time, end_time;
1097          struct timezone dummy;
1098          int i;
1099    
1100          if (jfriedl_XT)
1101          {
1102              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1103              const char *orig = ptr;
1104              ptr = malloc(newlen + 1);
1105              if (!ptr) {
1106                      printf("out of memory");
1107                      pcregrep_exit(2);
1108              }
1109              endptr = ptr;
1110              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1111              for (i = 0; i < jfriedl_XT; i++) {
1112                      strncpy(endptr, orig,  length);
1113                      endptr += length;
1114              }
1115              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1116              length = newlen;
1117          }
1118    
1119          if (gettimeofday(&start_time, &dummy) != 0)
1120                  perror("bad gettimeofday");
1121    
1122    
1123          for (i = 0; i < jfriedl_XR; i++)
1124              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1125                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1126    
1127          if (gettimeofday(&end_time, &dummy) != 0)
1128                  perror("bad gettimeofday");
1129    
1130          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1131                          -
1132                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1133    
1134          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1135          return 0;
1136      }
1137    #endif
1138    
1139      /* We come back here after a match when the -o option (only_matching) is set,
1140      in order to find any further matches in the same line. */
1141    
1142      ONLY_MATCHING_RESTART:
1143    
1144      /* Run through all the patterns until one matches or there is an error other
1145      than NOMATCH. This code is in a subroutine so that it can be re-used for
1146      finding subsequent matches when colouring matched lines. */
1147    
1148      match = match_patterns(matchptr, length, offsets, &mrc);
1149    
1150      /* If it's a match or a not-match (as required), do what's wanted. */
1151    
1152      if (match != invert)
1153        {
1154        BOOL hyphenprinted = FALSE;
1155    
1156        /* We've failed if we want a file that doesn't have any matches. */
1157    
1158        if (filenames == FN_NOMATCH_ONLY) return 1;
1159    
1160        /* Just count if just counting is wanted. */
1161    
1162        if (count_only) count++;
1163    
1164        /* If all we want is a file name, there is no need to scan any more lines
1165        in the file. */
1166    
1167        else if (filenames == FN_MATCH_ONLY)
1168          {
1169          fprintf(stdout, "%s\n", printname);
1170          return 0;
1171          }
1172    
1173        /* Likewise, if all we want is a yes/no answer. */
1174    
1175        else if (quiet) return 0;
1176    
1177        /* The --only-matching option prints just the substring that matched, and
1178        the --file-offsets and --line-offsets options output offsets for the
1179        matching substring (they both force --only-matching). None of these options
1180        prints any context. Afterwards, adjust the start and length, and then jump
1181        back to look for further matches in the same line. If we are in invert
1182        mode, however, nothing is printed - this could be still useful because the
1183        return code is set. */
1184    
1185        else if (only_matching)
1186          {
1187          if (!invert)
1188            {
1189            if (printname != NULL) fprintf(stdout, "%s:", printname);
1190            if (number) fprintf(stdout, "%d:", linenumber);
1191            if (line_offsets)
1192              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1193                offsets[1] - offsets[0]);
1194            else if (file_offsets)
1195              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1196                offsets[1] - offsets[0]);
1197            else
1198              {
1199              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1200              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1201              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1202              }
1203            fprintf(stdout, "\n");
1204            matchptr += offsets[1];
1205            length -= offsets[1];
1206            match = FALSE;
1207            goto ONLY_MATCHING_RESTART;
1208            }
1209          }
1210    
1211        /* This is the default case when none of the above options is set. We print
1212        the matching lines(s), possibly preceded and/or followed by other lines of
1213        context. */
1214    
1215        else
1216          {
1217          /* See if there is a requirement to print some "after" lines from a
1218          previous match. We never print any overlaps. */
1219    
1220          if (after_context > 0 && lastmatchnumber > 0)
1221            {
1222            int ellength;
1223            int linecount = 0;
1224            char *p = lastmatchrestart;
1225    
1226            while (p < ptr && linecount < after_context)
1227              {
1228              p = end_of_line(p, ptr, &ellength);
1229              linecount++;
1230              }
1231    
1232            /* It is important to advance lastmatchrestart during this printing so
1233            that it interacts correctly with any "before" printing below. Print
1234            each line's data using fwrite() in case there are binary zeroes. */
1235    
1236            while (lastmatchrestart < p)
1237              {
1238              char *pp = lastmatchrestart;
1239              if (printname != NULL) fprintf(stdout, "%s-", printname);
1240              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1241              pp = end_of_line(pp, endptr, &ellength);
1242              FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1243              lastmatchrestart = pp;
1244              }
1245            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1246            }
1247    
1248          /* If there were non-contiguous lines printed above, insert hyphens. */
1249    
1250          if (hyphenpending)
1251            {
1252            fprintf(stdout, "--\n");
1253            hyphenpending = FALSE;
1254            hyphenprinted = TRUE;
1255            }
1256    
1257          /* See if there is a requirement to print some "before" lines for this
1258          match. Again, don't print overlaps. */
1259    
1260          if (before_context > 0)
1261            {
1262            int linecount = 0;
1263            char *p = ptr;
1264    
1265            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1266                   linecount < before_context)
1267              {
1268              linecount++;
1269              p = previous_line(p, buffer);
1270              }
1271    
1272            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1273              fprintf(stdout, "--\n");
1274    
1275            while (p < ptr)
1276              {
1277              int ellength;
1278              char *pp = p;
1279              if (printname != NULL) fprintf(stdout, "%s-", printname);
1280              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1281              pp = end_of_line(pp, endptr, &ellength);
1282              FWRITE(p, 1, pp - p, stdout);
1283              p = pp;
1284              }
1285            }
1286    
1287          /* Now print the matching line(s); ensure we set hyphenpending at the end
1288          of the file if any context lines are being output. */
1289    
1290          if (after_context > 0 || before_context > 0)
1291            endhyphenpending = TRUE;
1292    
1293          if (printname != NULL) fprintf(stdout, "%s:", printname);
1294          if (number) fprintf(stdout, "%d:", linenumber);
1295    
1296          /* In multiline mode, we want to print to the end of the line in which
1297          the end of the matched string is found, so we adjust linelength and the
1298          line number appropriately, but only when there actually was a match
1299          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1300          the match will always be before the first newline sequence. */
1301    
1302          if (multiline)
1303            {
1304            int ellength;
1305            char *endmatch = ptr;
1306            if (!invert)
1307              {
1308              endmatch += offsets[1];
1309              t = ptr;
1310              while (t < endmatch)
1311                {
1312                t = end_of_line(t, endptr, &ellength);
1313                if (t <= endmatch) linenumber++; else break;
1314                }
1315              }
1316            endmatch = end_of_line(endmatch, endptr, &ellength);
1317            linelength = endmatch - ptr - ellength;
1318            }
1319    
1320          /*** NOTE: Use only fwrite() to output the data line, so that binary
1321          zeroes are treated as just another data character. */
1322    
1323          /* This extra option, for Jeffrey Friedl's debugging requirements,
1324          replaces the matched string, or a specific captured string if it exists,
1325          with X. When this happens, colouring is ignored. */
1326    
1327    #ifdef JFRIEDL_DEBUG
1328          if (S_arg >= 0 && S_arg < mrc)
1329            {
1330            int first = S_arg * 2;
1331            int last  = first + 1;
1332            FWRITE(ptr, 1, offsets[first], stdout);
1333            fprintf(stdout, "X");
1334            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1335            }
1336          else
1337    #endif
1338    
1339          /* We have to split the line(s) up if colouring, and search for further
1340          matches. */
1341    
1342          if (do_colour)
1343            {
1344            int last_offset = 0;
1345            FWRITE(ptr, 1, offsets[0], stdout);
1346            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1347            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1348            fprintf(stdout, "%c[00m", 0x1b);
1349            for (;;)
1350              {
1351              last_offset += offsets[1];
1352              matchptr += offsets[1];
1353              length -= offsets[1];
1354              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1355              FWRITE(matchptr, 1, offsets[0], stdout);
1356              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1357              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1358              fprintf(stdout, "%c[00m", 0x1b);
1359              }
1360            FWRITE(ptr + last_offset, 1,
1361              (linelength + endlinelength) - last_offset, stdout);
1362            }
1363    
1364          /* Not colouring; no need to search for further matches */
1365    
1366          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1367          }
1368    
1369        /* End of doing what has to be done for a match. If --line-buffered was
1370        given, flush the output. */
1371    
1372        if (line_buffered) fflush(stdout);
1373        rc = 0;    /* Had some success */
1374    
1375        /* Remember where the last match happened for after_context. We remember
1376        where we are about to restart, and that line's number. */
1377    
1378        lastmatchrestart = ptr + linelength + endlinelength;
1379        lastmatchnumber = linenumber + 1;
1380        }
1381    
1382      /* For a match in multiline inverted mode (which of course did not cause
1383      anything to be printed), we have to move on to the end of the match before
1384      proceeding. */
1385    
1386      if (multiline && invert && match)
1387        {
1388        int ellength;
1389        char *endmatch = ptr + offsets[1];
1390        t = ptr;
1391        while (t < endmatch)
1392          {
1393          t = end_of_line(t, endptr, &ellength);
1394          if (t <= endmatch) linenumber++; else break;
1395          }
1396        endmatch = end_of_line(endmatch, endptr, &ellength);
1397        linelength = endmatch - ptr - ellength;
1398        }
1399    
1400      /* Advance to after the newline and increment the line number. The file
1401      offset to the current line is maintained in filepos. */
1402    
1403      ptr += linelength + endlinelength;
1404      filepos += (int)(linelength + endlinelength);
1405      linenumber++;
1406    
1407      /* If input is line buffered, and the buffer is not yet full, read another
1408      line and add it into the buffer. */
1409    
1410      if (input_line_buffered && bufflength < sizeof(buffer))
1411        {
1412        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1413        bufflength += add;
1414        endptr += add;
1415        }
1416    
1417      /* If we haven't yet reached the end of the file (the buffer is full), and
1418      the current point is in the top 1/3 of the buffer, slide the buffer down by
1419      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1420      about to be lost, print them. */
1421    
1422      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1423        {
1424        if (after_context > 0 &&
1425            lastmatchnumber > 0 &&
1426            lastmatchrestart < buffer + MBUFTHIRD)
1427          {
1428          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1429          lastmatchnumber = 0;
1430          }
1431    
1432        /* Now do the shuffle */
1433    
1434        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1435        ptr -= MBUFTHIRD;
1436    
1437    #ifdef SUPPORT_LIBZ
1438        if (frtype == FR_LIBZ)
1439          bufflength = 2*MBUFTHIRD +
1440            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1441        else
1442    #endif
1443    
1444    #ifdef SUPPORT_LIBBZ2
1445        if (frtype == FR_LIBBZ2)
1446          bufflength = 2*MBUFTHIRD +
1447            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1448        else
1449    #endif
1450    
1451        bufflength = 2*MBUFTHIRD +
1452          (input_line_buffered?
1453           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1454           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1455        endptr = buffer + bufflength;
1456    
1457        /* Adjust any last match point */
1458    
1459        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1460        }
1461      }     /* Loop through the whole file */
1462    
1463    /* End of file; print final "after" lines if wanted; do_after_lines sets
1464    hyphenpending if it prints something. */
1465    
1466    if (!only_matching && !count_only)
1467      {
1468      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1469      hyphenpending |= endhyphenpending;
1470      }
1471    
1472    /* Print the file name if we are looking for those without matches and there
1473    were none. If we found a match, we won't have got this far. */
1474    
1475    if (filenames == FN_NOMATCH_ONLY)
1476      {
1477      fprintf(stdout, "%s\n", printname);
1478      return 0;
1479      }
1480    
1481    /* Print the match count if wanted */
1482    
1483    if (count_only)
1484      {
1485      if (count > 0 || !omit_zero_count)
1486        {
1487        if (printname != NULL && filenames != FN_NONE)
1488          fprintf(stdout, "%s:", printname);
1489        fprintf(stdout, "%d\n", count);
1490        }
1491      }
1492    
1493    return rc;
1494    }
1495    
1496    
1497    
1498    /*************************************************
1499    *     Grep a file or recurse into a directory    *
1500    *************************************************/
1501    
1502    /* Given a path name, if it's a directory, scan all the files if we are
1503    recursing; if it's a file, grep it.
1504    
1505    Arguments:
1506      pathname          the path to investigate
1507      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1508      only_one_at_top   TRUE if the path is the only one at toplevel
1509    
1510    Returns:   0 if there was at least one match
1511               1 if there were no matches
1512               2 there was some kind of error
1513    
1514    However, file opening failures are suppressed if "silent" is set.
1515    */
1516    
1517    static int
1518    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1519    {
1520    int rc = 1;
1521    int sep;
1522    int frtype;
1523    int pathlen;
1524    void *handle;
1525    FILE *in = NULL;           /* Ensure initialized */
1526    
1527    #ifdef SUPPORT_LIBZ
1528    gzFile ingz = NULL;
1529    #endif
1530    
1531    #ifdef SUPPORT_LIBBZ2
1532    BZFILE *inbz2 = NULL;
1533    #endif
1534    
1535    /* If the file name is "-" we scan stdin */
1536    
1537    if (strcmp(pathname, "-") == 0)
1538      {
1539      return pcregrep(stdin, FR_PLAIN,
1540        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1541          stdin_name : NULL);
1542      }
1543    
1544    /* If the file is a directory, skip if skipping or if we are recursing, scan
1545    each file and directory within it, subject to any include or exclude patterns
1546    that were set. The scanning code is localized so it can be made
1547    system-specific. */
1548    
1549    if ((sep = isdirectory(pathname)) != 0)
1550      {
1551      if (dee_action == dee_SKIP) return 1;
1552      if (dee_action == dee_RECURSE)
1553        {
1554        char buffer[1024];
1555        char *nextfile;
1556        directory_type *dir = opendirectory(pathname);
1557    
1558        if (dir == NULL)
1559          {
1560          if (!silent)
1561            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1562              strerror(errno));
1563          return 2;
1564          }
1565    
1566        while ((nextfile = readdirectory(dir)) != NULL)
1567          {
1568          int frc, nflen;
1569          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1570          nflen = (int)(strlen(nextfile));
1571    
1572          if (isdirectory(buffer))
1573            {
1574            if (exclude_dir_compiled != NULL &&
1575                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1576              continue;
1577    
1578            if (include_dir_compiled != NULL &&
1579                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1580              continue;
1581            }
1582          else
1583            {
1584            if (exclude_compiled != NULL &&
1585                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1586              continue;
1587    
1588            if (include_compiled != NULL &&
1589                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1590              continue;
1591            }
1592    
1593          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1594          if (frc > 1) rc = frc;
1595           else if (frc == 0 && rc == 1) rc = 0;
1596          }
1597    
1598        closedirectory(dir);
1599        return rc;
1600        }
1601      }
1602    
1603    /* If the file is not a directory and not a regular file, skip it if that's
1604    been requested. */
1605    
1606    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1607    
1608    /* Control reaches here if we have a regular file, or if we have a directory
1609    and recursion or skipping was not requested, or if we have anything else and
1610    skipping was not requested. The scan proceeds. If this is the first and only
1611    argument at top level, we don't show the file name, unless we are only showing
1612    the file name, or the filename was forced (-H). */
1613    
1614    pathlen = (int)(strlen(pathname));
1615    
1616    /* Open using zlib if it is supported and the file name ends with .gz. */
1617    
1618    #ifdef SUPPORT_LIBZ
1619    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1620      {
1621      ingz = gzopen(pathname, "rb");
1622      if (ingz == NULL)
1623        {
1624        if (!silent)
1625          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1626            strerror(errno));
1627        return 2;
1628        }
1629      handle = (void *)ingz;
1630      frtype = FR_LIBZ;
1631      }
1632    else
1633    #endif
1634    
1635    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1636    
1637    #ifdef SUPPORT_LIBBZ2
1638    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1639      {
1640      inbz2 = BZ2_bzopen(pathname, "rb");
1641      handle = (void *)inbz2;
1642      frtype = FR_LIBBZ2;
1643      }
1644    else
1645    #endif
1646    
1647    /* Otherwise use plain fopen(). The label is so that we can come back here if
1648    an attempt to read a .bz2 file indicates that it really is a plain file. */
1649    
1650    #ifdef SUPPORT_LIBBZ2
1651    PLAIN_FILE:
1652    #endif
1653      {
1654      in = fopen(pathname, "rb");
1655      handle = (void *)in;
1656      frtype = FR_PLAIN;
1657      }
1658    
1659    /* All the opening methods return errno when they fail. */
1660    
1661    if (handle == NULL)
1662      {
1663      if (!silent)
1664        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1665          strerror(errno));
1666      return 2;
1667      }
1668    
1669    /* Now grep the file */
1670    
1671    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1672      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1673    
1674    /* Close in an appropriate manner. */
1675    
1676    #ifdef SUPPORT_LIBZ
1677    if (frtype == FR_LIBZ)
1678      gzclose(ingz);
1679    else
1680    #endif
1681    
1682    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1683    read failed. If the error indicates that the file isn't in fact bzipped, try
1684    again as a normal file. */
1685    
1686    #ifdef SUPPORT_LIBBZ2
1687    if (frtype == FR_LIBBZ2)
1688      {
1689      if (rc == 2)
1690        {
1691        int errnum;
1692        const char *err = BZ2_bzerror(inbz2, &errnum);
1693        if (errnum == BZ_DATA_ERROR_MAGIC)
1694          {
1695          BZ2_bzclose(inbz2);
1696          goto PLAIN_FILE;
1697          }
1698        else if (!silent)
1699          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1700            pathname, err);
1701        }
1702      BZ2_bzclose(inbz2);
1703      }
1704    else
1705    #endif
1706    
1707    /* Normal file close */
1708    
1709    fclose(in);
1710    
1711    /* Pass back the yield from pcregrep(). */
1712    
1713    return rc;
1714    }
1715    
1716    
1717    
1718    
1719    /*************************************************
1720    *                Usage function                  *
1721    *************************************************/
1722    
1723    static int
1724    usage(int rc)
1725    {
1726    option_item *op;
1727    fprintf(stderr, "Usage: pcregrep [-");
1728    for (op = optionlist; op->one_char != 0; op++)
1729      {
1730      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1731      }
1732    fprintf(stderr, "] [long options] [pattern] [files]\n");
1733    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1734      "options.\n");
1735    return rc;
1736    }
1737    
1738    
1739    
1740    
1741    /*************************************************
1742    *                Help function                   *
1743    *************************************************/
1744    
1745    static void
1746    help(void)
1747    {
1748    option_item *op;
1749    
1750    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1751    printf("Search for PATTERN in each FILE or standard input.\n");
1752    printf("PATTERN must be present if neither -e nor -f is used.\n");
1753    printf("\"-\" can be used as a file name to mean STDIN.\n");
1754    
1755    #ifdef SUPPORT_LIBZ
1756    printf("Files whose names end in .gz are read using zlib.\n");
1757    #endif
1758    
1759    #ifdef SUPPORT_LIBBZ2
1760    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1761    #endif
1762    
1763    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1764    printf("Other files and the standard input are read as plain files.\n\n");
1765    #else
1766    printf("All files are read as plain files, without any interpretation.\n\n");
1767    #endif
1768    
1769    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1770    printf("Options:\n");
1771    
1772    for (op = optionlist; op->one_char != 0; op++)
1773      {
1774      int n;
1775      char s[4];
1776      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1777      n = 30 - printf("  %s --%s", s, op->long_name);
1778      if (n < 1) n = 1;
1779      printf("%.*s%s\n", n, "                    ", op->help_text);
1780      }
1781    
1782    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1783    printf("trailing white space is removed and blank lines are ignored.\n");
1784    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1785    
1786    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1787    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1788    }
1789    
1790    
1791    
1792    
1793    /*************************************************
1794    *    Handle a single-letter, no data option      *
1795    *************************************************/
1796    
1797    static int
1798    handle_option(int letter, int options)
1799    {
1800    switch(letter)
1801      {
1802      case N_FOFFSETS: file_offsets = TRUE; break;
1803      case N_HELP: help(); pcregrep_exit(0);
1804      case N_LOFFSETS: line_offsets = number = TRUE; break;
1805      case N_LBUFFER: line_buffered = TRUE; break;
1806      case 'c': count_only = TRUE; break;
1807      case 'F': process_options |= PO_FIXED_STRINGS; break;
1808      case 'H': filenames = FN_FORCE; break;
1809      case 'h': filenames = FN_NONE; break;
1810      case 'i': options |= PCRE_CASELESS; break;
1811      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1812      case 'L': filenames = FN_NOMATCH_ONLY; break;
1813      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1814      case 'n': number = TRUE; break;
1815      case 'o': only_matching = TRUE; break;
1816      case 'q': quiet = TRUE; break;
1817      case 'r': dee_action = dee_RECURSE; break;
1818      case 's': silent = TRUE; break;
1819      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1820      case 'v': invert = TRUE; break;
1821      case 'w': process_options |= PO_WORD_MATCH; break;
1822      case 'x': process_options |= PO_LINE_MATCH; break;
1823    
1824      case 'V':
1825      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1826      pcregrep_exit(0);
1827      break;
1828    
1829      default:
1830      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1831      pcregrep_exit(usage(2));
1832      }
1833    
1834    return options;
1835    }
1836    
1837    
1838    
1839    
1840    /*************************************************
1841    *          Construct printed ordinal             *
1842    *************************************************/
1843    
1844    /* This turns a number into "1st", "3rd", etc. */
1845    
1846    static char *
1847    ordin(int n)
1848    {
1849    static char buffer[8];
1850    char *p = buffer;
1851    sprintf(p, "%d", n);
1852    while (*p != 0) p++;
1853    switch (n%10)
1854      {
1855      case 1: strcpy(p, "st"); break;
1856      case 2: strcpy(p, "nd"); break;
1857      case 3: strcpy(p, "rd"); break;
1858      default: strcpy(p, "th"); break;
1859      }
1860    return buffer;
1861    }
1862    
1863    
1864    
1865    /*************************************************
1866    *          Compile a single pattern              *
1867    *************************************************/
1868    
1869    /* When the -F option has been used, this is called for each substring.
1870    Otherwise it's called for each supplied pattern.
1871    
1872    Arguments:
1873      pattern        the pattern string
1874      options        the PCRE options
1875      filename       the file name, or NULL for a command-line pattern
1876      count          0 if this is the only command line pattern, or
1877                     number of the command line pattern, or
1878                     linenumber for a pattern from a file
1879    
1880    Returns:         TRUE on success, FALSE after an error
1881    */
1882    
1883    static BOOL
1884    compile_single_pattern(char *pattern, int options, char *filename, int count)
1885    {
1886    char buffer[MBUFTHIRD + 16];
1887    const char *error;
1888    int errptr;
1889    
1890    if (pattern_count >= MAX_PATTERN_COUNT)
1891      {
1892      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1893        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1894      return FALSE;
1895      }
1896    
1897    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1898      suffix[process_options]);
1899    pattern_list[pattern_count] =
1900      pcre_compile(buffer, options, &error, &errptr, pcretables);
1901    if (pattern_list[pattern_count] != NULL)
1902      {
1903      pattern_count++;
1904      return TRUE;
1905      }
1906    
1907    /* Handle compile errors */
1908    
1909    errptr -= (int)strlen(prefix[process_options]);
1910    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1911    
1912    if (filename == NULL)
1913      {
1914      if (count == 0)
1915        fprintf(stderr, "pcregrep: Error in command-line regex "
1916          "at offset %d: %s\n", errptr, error);
1917      else
1918        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1919          "at offset %d: %s\n", ordin(count), errptr, error);
1920      }
1921    else
1922      {
1923      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1924        "at offset %d: %s\n", count, filename, errptr, error);
1925      }
1926    
1927    return FALSE;
1928    }
1929    
1930    
1931    
1932  /*************************************************  /*************************************************
1933  *                Handle an option                *  *           Compile one supplied pattern         *
1934  *************************************************/  *************************************************/
1935    
1936  static int  /* When the -F option has been used, each string may be a list of strings,
1937  handle_option(int letter, int options)  separated by line breaks. They will be matched literally.
 {  
 switch(letter)  
   {  
   case -1:  help(); exit(0);  
   case 'c': count_only = TRUE; break;  
   case 'h': filenames = FALSE; break;  
   case 'i': options |= PCRE_CASELESS; break;  
   case 'l': filenames_only = TRUE;  
   case 'n': number = TRUE; break;  
   case 'r': recurse = TRUE; break;  
   case 's': silent = TRUE; break;  
   case 'v': invert = TRUE; break;  
   case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
1938    
1939    case 'V':  Arguments:
1940    fprintf(stderr, "pcregrep version %s using ", VERSION);    pattern        the pattern string
1941    fprintf(stderr, "PCRE version %s\n", pcre_version());    options        the PCRE options
1942    exit(0);    filename       the file name, or NULL for a command-line pattern
1943    break;    count          0 if this is the only command line pattern, or
1944                     number of the command line pattern, or
1945                     linenumber for a pattern from a file
1946    
1947    default:  Returns:         TRUE on success, FALSE after an error
1948    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);  */
   exit(usage(2));  
   }  
1949    
1950  return options;  static BOOL
1951    compile_pattern(char *pattern, int options, char *filename, int count)
1952    {
1953    if ((process_options & PO_FIXED_STRINGS) != 0)
1954      {
1955      char *eop = pattern + strlen(pattern);
1956      char buffer[MBUFTHIRD];
1957      for(;;)
1958        {
1959        int ellength;
1960        char *p = end_of_line(pattern, eop, &ellength);
1961        if (ellength == 0)
1962          return compile_single_pattern(pattern, options, filename, count);
1963        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1964        pattern = p;
1965        if (!compile_single_pattern(buffer, options, filename, count))
1966          return FALSE;
1967        }
1968      }
1969    else return compile_single_pattern(pattern, options, filename, count);
1970  }  }
1971    
1972    
1973    
   
1974  /*************************************************  /*************************************************
1975  *                Main program                    *  *                Main program                    *
1976  *************************************************/  *************************************************/
1977    
1978    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1979    
1980  int  int
1981  main(int argc, char **argv)  main(int argc, char **argv)
1982  {  {
1983  int i, j;  int i, j;
1984  int rc = 1;  int rc = 1;
1985  int options = 0;  int pcre_options = 0;
1986    int cmd_pattern_count = 0;
1987    int hint_count = 0;
1988  int errptr;  int errptr;
 const char *error;  
1989  BOOL only_one_at_top;  BOOL only_one_at_top;
1990    char *patterns[MAX_PATTERN_COUNT];
1991    const char *locale_from = "--locale";
1992    const char *error;
1993    
1994    /* Set the default line ending value from the default in the PCRE library;
1995    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1996    Note that the return values from pcre_config(), though derived from the ASCII
1997    codes, are the same in EBCDIC environments, so we must use the actual values
1998    rather than escapes such as as '\r'. */
1999    
2000    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2001    switch(i)
2002      {
2003      default:               newline = (char *)"lf"; break;
2004      case 13:               newline = (char *)"cr"; break;
2005      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2006      case -1:               newline = (char *)"any"; break;
2007      case -2:               newline = (char *)"anycrlf"; break;
2008      }
2009    
2010  /* Process the options */  /* Process the options */
2011    
2012  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
2013    {    {
2014      option_item *op = NULL;
2015      char *option_data = (char *)"";    /* default to keep compiler happy */
2016      BOOL longop;
2017      BOOL longopwasequals = FALSE;
2018    
2019    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2020    
2021    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2022      but only if we have previously had -e or -f to define the patterns. */
2023    
2024      if (argv[i][1] == 0)
2025        {
2026        if (pattern_filename != NULL || pattern_count > 0) break;
2027          else pcregrep_exit(usage(2));
2028        }
2029    
2030      /* Handle a long name option, or -- to terminate the options */
2031    
2032    if (argv[i][1] == '-')    if (argv[i][1] == '-')
2033      {      {
2034      option_item *op;      char *arg = argv[i] + 2;
2035        char *argequals = strchr(arg, '=');
2036    
2037      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
2038        {        {
2039        pattern_filename = argv[i] + 7;        i++;
2040        continue;        break;                /* out of the options-handling loop */
2041        }        }
2042    
2043        longop = TRUE;
2044    
2045        /* Some long options have data that follows after =, for example file=name.
2046        Some options have variations in the long name spelling: specifically, we
2047        allow "regexp" because GNU grep allows it, though I personally go along
2048        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2049        These options are entered in the table as "regex(p)". Options can be in
2050        both these categories. */
2051    
2052      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2053        {        {
2054        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
2055          char *equals = strchr(op->long_name, '=');
2056    
2057          /* Handle options with only one spelling of the name */
2058    
2059          if (opbra == NULL)     /* Does not contain '(' */
2060          {          {
2061          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
2062          break;            {
2063              if (strcmp(arg, op->long_name) == 0) break;
2064              }
2065            else                 /* Special case xxx=data */
2066              {
2067              int oplen = (int)(equals - op->long_name);
2068              int arglen = (argequals == NULL)?
2069                (int)strlen(arg) : (int)(argequals - arg);
2070              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2071                {
2072                option_data = arg + arglen;
2073                if (*option_data == '=')
2074                  {
2075                  option_data++;
2076                  longopwasequals = TRUE;
2077                  }
2078                break;
2079                }
2080              }
2081            }
2082    
2083          /* Handle options with an alternate spelling of the name */
2084    
2085          else
2086            {
2087            char buff1[24];
2088            char buff2[24];
2089    
2090            int baselen = (int)(opbra - op->long_name);
2091            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2092            int arglen = (argequals == NULL || equals == NULL)?
2093              (int)strlen(arg) : (int)(argequals - arg);
2094    
2095            sprintf(buff1, "%.*s", baselen, op->long_name);
2096            sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2097    
2098            if (strncmp(arg, buff1, arglen) == 0 ||
2099               strncmp(arg, buff2, arglen) == 0)
2100              {
2101              if (equals != NULL && argequals != NULL)
2102                {
2103                option_data = argequals;
2104                if (*option_data == '=')
2105                  {
2106                  option_data++;
2107                  longopwasequals = TRUE;
2108                  }
2109                }
2110              break;
2111              }
2112          }          }
2113        }        }
2114    
2115      if (op->one_char == 0)      if (op->one_char == 0)
2116        {        {
2117        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2118        exit(usage(2));        pcregrep_exit(usage(2));
2119        }        }
2120      }      }
2121    
2122    /* One-char options */    /* Jeffrey Friedl's debugging harness uses these additional options which
2123      are not in the right form for putting in the option table because they use
2124      only one hyphen, yet are more than one character long. By putting them
2125      separately here, they will not get displayed as part of the help() output,
2126      but I don't think Jeffrey will care about that. */
2127    
2128    #ifdef JFRIEDL_DEBUG
2129      else if (strcmp(argv[i], "-pre") == 0) {
2130              jfriedl_prefix = argv[++i];
2131              continue;
2132      } else if (strcmp(argv[i], "-post") == 0) {
2133              jfriedl_postfix = argv[++i];
2134              continue;
2135      } else if (strcmp(argv[i], "-XT") == 0) {
2136              sscanf(argv[++i], "%d", &jfriedl_XT);
2137              continue;
2138      } else if (strcmp(argv[i], "-XR") == 0) {
2139              sscanf(argv[++i], "%d", &jfriedl_XR);
2140              continue;
2141      }
2142    #endif
2143    
2144    
2145      /* One-char options; many that have no data may be in a single argument; we
2146      continue till we hit the last one or one that needs data. */
2147    
2148    else    else
2149      {      {
2150      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2151        longop = FALSE;
2152      while (*s != 0)      while (*s != 0)
2153        {        {
2154        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2155            { if (*s == op->one_char) break; }
2156          if (op->one_char == 0)
2157          {          {
2158          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2159          if (pattern_filename[0] == 0)            *s, argv[i]);
2160            {          pcregrep_exit(usage(2));
2161            if (i >= argc - 1)          }
2162              {        if (op->type != OP_NODATA || s[1] == 0)
2163              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
2164              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
2165          break;          break;
2166          }          }
2167        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
2168        }        }
2169      }      }
   }  
2170    
2171  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
2172  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
2173      something in the PCRE options. */
2174    
2175  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
2176    {      {
2177    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
2178    return 2;      continue;
2179    }      }
2180    
2181  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2182      either has a value or defaults to something. It cannot have data in a
2183      separate item. At the moment, the only such options are "colo(u)r" and
2184      Jeffrey Friedl's special -S debugging option. */
2185    
2186  if (pattern_filename != NULL)    if (*option_data == 0 &&
2187    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
2188      {      {
2189      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
2190        strerror(errno));        {
2191      return 2;        case N_COLOUR:
2192          colour_option = (char *)"auto";
2193          break;
2194    #ifdef JFRIEDL_DEBUG
2195          case 'S':
2196          S_arg = 0;
2197          break;
2198    #endif
2199          }
2200        continue;
2201        }
2202    
2203      /* Otherwise, find the data string for the option. */
2204    
2205      if (*option_data == 0)
2206        {
2207        if (i >= argc - 1 || longopwasequals)
2208          {
2209          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2210          pcregrep_exit(usage(2));
2211          }
2212        option_data = argv[++i];
2213      }      }
2214    while (fgets(buffer, sizeof(buffer), f) != NULL)  
2215      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2216      multiple times to create a list of patterns. */
2217    
2218      if (op->type == OP_PATLIST)
2219      {      {
2220      char *s = buffer + (int)strlen(buffer);      if (cmd_pattern_count >= MAX_PATTERN_COUNT)
     if (pattern_count >= MAX_PATTERN_COUNT)  
2221        {        {
2222        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2223          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2224        return 2;        return 2;
2225        }        }
2226      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2227      if (s == buffer) continue;      }
2228      *s = 0;  
2229      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2230        &errptr, NULL);  
2231      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2232        {
2233        *((char **)op->dataptr) = option_data;
2234        }
2235    
2236      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2237      only for unpicking arguments, so just keep it simple. */
2238    
2239      else
2240        {
2241        unsigned long int n = 0;
2242        char *endptr = option_data;
2243        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2244        while (isdigit((unsigned char)(*endptr)))
2245          n = n * 10 + (int)(*endptr++ - '0');
2246        if (*endptr != 0)
2247        {        {
2248        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
2249          pattern_count, errptr, error);          {
2250        return 2;          char *equals = strchr(op->long_name, '=');
2251            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2252              (int)(equals - op->long_name);
2253            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2254              option_data, nlen, op->long_name);
2255            }
2256          else
2257            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2258              option_data, op->one_char);
2259          pcregrep_exit(usage(2));
2260        }        }
2261        *((int *)op->dataptr) = n;
2262        }
2263      }
2264    
2265    /* Options have been decoded. If -C was used, its value is used as a default
2266    for -A and -B. */
2267    
2268    if (both_context > 0)
2269      {
2270      if (after_context == 0) after_context = both_context;
2271      if (before_context == 0) before_context = both_context;
2272      }
2273    
2274    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2275    However, the latter two set the only_matching flag. */
2276    
2277    if ((only_matching && (file_offsets || line_offsets)) ||
2278        (file_offsets && line_offsets))
2279      {
2280      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2281        "and/or --line-offsets\n");
2282      pcregrep_exit(usage(2));
2283      }
2284    
2285    if (file_offsets || line_offsets) only_matching = TRUE;
2286    
2287    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2288    LC_ALL environment variable is set, and if so, use it. */
2289    
2290    if (locale == NULL)
2291      {
2292      locale = getenv("LC_ALL");
2293      locale_from = "LCC_ALL";
2294      }
2295    
2296    if (locale == NULL)
2297      {
2298      locale = getenv("LC_CTYPE");
2299      locale_from = "LC_CTYPE";
2300      }
2301    
2302    /* If a locale has been provided, set it, and generate the tables the PCRE
2303    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2304    
2305    if (locale != NULL)
2306      {
2307      if (setlocale(LC_CTYPE, locale) == NULL)
2308        {
2309        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2310          locale, locale_from);
2311        return 2;
2312        }
2313      pcretables = pcre_maketables();
2314      }
2315    
2316    /* Sort out colouring */
2317    
2318    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2319      {
2320      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2321      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2322      else
2323        {
2324        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2325          colour_option);
2326        return 2;
2327        }
2328      if (do_colour)
2329        {
2330        char *cs = getenv("PCREGREP_COLOUR");
2331        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2332        if (cs != NULL) colour_string = cs;
2333      }      }
   fclose(f);  
2334    }    }
2335    
2336  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2337    
2338    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2339      {
2340      pcre_options |= PCRE_NEWLINE_CR;
2341      endlinetype = EL_CR;
2342      }
2343    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2344      {
2345      pcre_options |= PCRE_NEWLINE_LF;
2346      endlinetype = EL_LF;
2347      }
2348    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2349      {
2350      pcre_options |= PCRE_NEWLINE_CRLF;
2351      endlinetype = EL_CRLF;
2352      }
2353    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2354      {
2355      pcre_options |= PCRE_NEWLINE_ANY;
2356      endlinetype = EL_ANY;
2357      }
2358    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2359      {
2360      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2361      endlinetype = EL_ANYCRLF;
2362      }
2363  else  else
2364    {    {
2365    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2366    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2367    if (pattern_list[0] == NULL)    }
2368    
2369    /* Interpret the text values for -d and -D */
2370    
2371    if (dee_option != NULL)
2372      {
2373      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2374      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2375      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2376      else
2377      {      {
2378      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
       error);  
2379      return 2;      return 2;
2380      }      }
   pattern_count++;  
2381    }    }
2382    
2383  /* Study the regular expressions, as we will be running them may times */  if (DEE_option != NULL)
2384      {
2385      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2386      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2387      else
2388        {
2389        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2390        return 2;
2391        }
2392      }
2393    
2394    /* Check the values for Jeffrey Friedl's debugging options. */
2395    
2396    #ifdef JFRIEDL_DEBUG
2397    if (S_arg > 9)
2398      {
2399      fprintf(stderr, "pcregrep: bad value for -S option\n");
2400      return 2;
2401      }
2402    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2403      {
2404      if (jfriedl_XT == 0) jfriedl_XT = 1;
2405      if (jfriedl_XR == 0) jfriedl_XR = 1;
2406      }
2407    #endif
2408    
2409    /* Get memory to store the pattern and hints lists. */
2410    
2411    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2412    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2413    
2414    if (pattern_list == NULL || hints_list == NULL)
2415      {
2416      fprintf(stderr, "pcregrep: malloc failed\n");
2417      goto EXIT2;
2418      }
2419    
2420    /* If no patterns were provided by -e, and there is no file provided by -f,
2421    the first argument is the one and only pattern, and it must exist. */
2422    
2423    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2424      {
2425      if (i >= argc) return usage(2);
2426      patterns[cmd_pattern_count++] = argv[i++];
2427      }
2428    
2429    /* Compile the patterns that were provided on the command line, either by
2430    multiple uses of -e or as a single unkeyed pattern. */
2431    
2432    for (j = 0; j < cmd_pattern_count; j++)
2433      {
2434      if (!compile_pattern(patterns[j], pcre_options, NULL,
2435           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2436        goto EXIT2;
2437      }
2438    
2439    /* Compile the regular expressions that are provided in a file. */
2440    
2441    if (pattern_filename != NULL)
2442      {
2443      int linenumber = 0;
2444      FILE *f;
2445      char *filename;
2446      char buffer[MBUFTHIRD];
2447    
2448      if (strcmp(pattern_filename, "-") == 0)
2449        {
2450        f = stdin;
2451        filename = stdin_name;
2452        }
2453      else
2454        {
2455        f = fopen(pattern_filename, "r");
2456        if (f == NULL)
2457          {
2458          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2459            strerror(errno));
2460          goto EXIT2;
2461          }
2462        filename = pattern_filename;
2463        }
2464    
2465      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2466        {
2467        char *s = buffer + (int)strlen(buffer);
2468        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2469        *s = 0;
2470        linenumber++;
2471        if (buffer[0] == 0) continue;   /* Skip blank lines */
2472        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2473          goto EXIT2;
2474        }
2475    
2476      if (f != stdin) fclose(f);
2477      }
2478    
2479    /* Study the regular expressions, as we will be running them many times */
2480    
2481  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2482    {    {
# Line 513  for (j = 0; j < pattern_count; j++) Line 2486  for (j = 0; j < pattern_count; j++)
2486      char s[16];      char s[16];
2487      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2488      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2489      return 2;      goto EXIT2;
2490        }
2491      hint_count++;
2492      }
2493    
2494    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2495    pcre_extra block for each pattern. */
2496    
2497    if (match_limit > 0 || match_limit_recursion > 0)
2498      {
2499      for (j = 0; j < pattern_count; j++)
2500        {
2501        if (hints_list[j] == NULL)
2502          {
2503          hints_list[j] = malloc(sizeof(pcre_extra));
2504          if (hints_list[j] == NULL)
2505            {
2506            fprintf(stderr, "pcregrep: malloc failed\n");
2507            pcregrep_exit(2);
2508            }
2509          }
2510        if (match_limit > 0)
2511          {
2512          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2513          hints_list[j]->match_limit = match_limit;
2514          }
2515        if (match_limit_recursion > 0)
2516          {
2517          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2518          hints_list[j]->match_limit_recursion = match_limit_recursion;
2519          }
2520        }
2521      }
2522    
2523    /* If there are include or exclude patterns, compile them. */
2524    
2525    if (exclude_pattern != NULL)
2526      {
2527      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2528        pcretables);
2529      if (exclude_compiled == NULL)
2530        {
2531        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2532          errptr, error);
2533        goto EXIT2;
2534        }
2535      }
2536    
2537    if (include_pattern != NULL)
2538      {
2539      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2540        pcretables);
2541      if (include_compiled == NULL)
2542        {
2543        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2544          errptr, error);
2545        goto EXIT2;
2546        }
2547      }
2548    
2549    if (exclude_dir_pattern != NULL)
2550      {
2551      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2552        pcretables);
2553      if (exclude_dir_compiled == NULL)
2554        {
2555        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2556          errptr, error);
2557        goto EXIT2;
2558        }
2559      }
2560    
2561    if (include_dir_pattern != NULL)
2562      {
2563      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2564        pcretables);
2565      if (include_dir_compiled == NULL)
2566        {
2567        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2568          errptr, error);
2569        goto EXIT2;
2570      }      }
2571    }    }
2572    
2573  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2574    
2575  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2576      {
2577      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2578      goto EXIT;
2579      }
2580    
2581  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2582  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2583  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2584    otherwise forced. */
2585    
2586  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2587    
2588  for (; i < argc; i++)  for (; i < argc; i++)
2589    {    {
2590    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2591    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2592      if (frc > 1) rc = frc;
2593        else if (frc == 0 && rc == 1) rc = 0;
2594    }    }
2595    
2596  return rc;  EXIT:
2597    if (pattern_list != NULL)
2598      {
2599      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2600      free(pattern_list);
2601      }
2602    if (hints_list != NULL)
2603      {
2604      for (i = 0; i < hint_count; i++)
2605        {
2606        if (hints_list[i] != NULL) free(hints_list[i]);
2607        }
2608      free(hints_list);
2609      }
2610    pcregrep_exit(rc);
2611    
2612    EXIT2:
2613    rc = 2;
2614    goto EXIT;
2615  }  }
2616    
2617  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.561

  ViewVC Help
Powered by ViewVC 1.1.5