/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 667 by ph10, Mon Aug 22 14:57:32 2011 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2011 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 70  its pattern matching. On a Unix system i
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "2.0 01-Aug-2001"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define PATBUFSIZE BUFSIZ
78    #else
79    #define PATBUFSIZE 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
118  *               Global variables                 *  *               Global variables                 *
119  *************************************************/  *************************************************/
120    
121    /* Jeffrey Friedl has some debugging requirements that are not part of the
122    regular code. */
123    
124    #ifdef JFRIEDL_DEBUG
125    static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130    #endif
131    
132    static int  endlinetype;
133    
134    static char *colour_string = (char *)"1;31";
135    static char *colour_option = NULL;
136    static char *dee_option = NULL;
137    static char *DEE_option = NULL;
138    static char *main_buffer = NULL;
139    static char *newline = NULL;
140  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
141    static char *stdin_name = (char *)"(standard input)";
142    static char *locale = NULL;
143    
144    static const unsigned char *pcretables = NULL;
145    
146  static int  pattern_count = 0;  static int  pattern_count = 0;
147  static pcre **pattern_list;  static pcre **pattern_list = NULL;
148  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
149    
150    static char *include_pattern = NULL;
151    static char *exclude_pattern = NULL;
152    static char *include_dir_pattern = NULL;
153    static char *exclude_dir_pattern = NULL;
154    
155    static pcre *include_compiled = NULL;
156    static pcre *exclude_compiled = NULL;
157    static pcre *include_dir_compiled = NULL;
158    static pcre *exclude_dir_compiled = NULL;
159    
160    static int after_context = 0;
161    static int before_context = 0;
162    static int both_context = 0;
163    static int bufthird = PCREGREP_BUFSIZE;
164    static int bufsize = 3*PCREGREP_BUFSIZE;
165    static int dee_action = dee_READ;
166    static int DEE_action = DEE_READ;
167    static int error_count = 0;
168    static int filenames = FN_DEFAULT;
169    static int only_matching = -1;
170    static int process_options = 0;
171    static int study_options = 0;
172    
173    static unsigned long int match_limit = 0;
174    static unsigned long int match_limit_recursion = 0;
175    
176  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
177  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
178  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
179    static BOOL hyphenpending = FALSE;
180  static BOOL invert = FALSE;  static BOOL invert = FALSE;
181    static BOOL line_buffered = FALSE;
182    static BOOL line_offsets = FALSE;
183    static BOOL multiline = FALSE;
184  static BOOL number = FALSE;  static BOOL number = FALSE;
185  static BOOL recurse = FALSE;  static BOOL omit_zero_count = FALSE;
186    static BOOL resource_error = FALSE;
187    static BOOL quiet = FALSE;
188  static BOOL silent = FALSE;  static BOOL silent = FALSE;
189  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
190    
191  /* Structure for options and list of them */  /* Structure for options and list of them */
192    
193    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
194           OP_OP_NUMBER, OP_PATLIST };
195    
196  typedef struct option_item {  typedef struct option_item {
197      int type;
198    int one_char;    int one_char;
199    char *long_name;    void *dataptr;
200    char *help_text;    const char *long_name;
201      const char *help_text;
202  } option_item;  } option_item;
203    
204    /* Options without a single-letter equivalent get a negative value. This can be
205    used to identify them. */
206    
207    #define N_COLOUR       (-1)
208    #define N_EXCLUDE      (-2)
209    #define N_EXCLUDE_DIR  (-3)
210    #define N_HELP         (-4)
211    #define N_INCLUDE      (-5)
212    #define N_INCLUDE_DIR  (-6)
213    #define N_LABEL        (-7)
214    #define N_LOCALE       (-8)
215    #define N_NULL         (-9)
216    #define N_LOFFSETS     (-10)
217    #define N_FOFFSETS     (-11)
218    #define N_LBUFFER      (-12)
219    #define N_M_LIMIT      (-13)
220    #define N_M_LIMIT_REC  (-14)
221    #define N_BUFSIZE      (-15)
222    
223  static option_item optionlist[] = {  static option_item optionlist[] = {
224    { -1,  "help",         "display this help and exit" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
225    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
226    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
227    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
228    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
229    { 'n', "line-number",  "print line number with output lines" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
230    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
231    { 's', "no-messages",  "suppress error messages" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
232    { 'V', "version",      "print version information and exit" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
233    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
234    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
235    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
236    { 0,    NULL,           NULL }    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
237      { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
238      { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
239      { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
240      { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
241      { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
242      { OP_NODATA,     'j',      NULL,              "jit",           "use JIT compiler if available" },
243      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
244      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
245      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
246      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
247      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
248      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
249      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
250      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
251      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
252      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
253      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
254      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
255      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
256      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
257      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
258      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
259      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
260      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
261    
262      /* These two were accidentally implemented with underscores instead of
263      hyphens in the option names. As this was not discovered for several releases,
264      the incorrect versions are left in the table for compatibility. However, the
265      --help function misses out any option that has an underscore in its name. */
266    
267      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
268      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
269    
270    #ifdef JFRIEDL_DEBUG
271      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
272    #endif
273      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
274      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
275      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
276      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
277      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
278      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
279      { OP_NODATA,    0,        NULL,               NULL,            NULL }
280  };  };
281    
282    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
283    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
284    that the combination of -w and -x has the same effect as -x on its own, so we
285    can treat them as the same. */
286    
287    static const char *prefix[] = {
288      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
289    
290    static const char *suffix[] = {
291      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
292    
293    /* UTF-8 tables - used only when the newline setting is "any". */
294    
295    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
296    
297    const char utf8_table4[] = {
298      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
299      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
300      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
301      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
302    
303    
304    
305  /*************************************************  /*************************************************
306  *       Functions for directory scanning         *  *         Exit from the program                  *
307    *************************************************/
308    
309    /* If there has been a resource error, give a suitable message.
310    
311    Argument:  the return code
312    Returns:   does not return
313    */
314    
315    static void
316    pcregrep_exit(int rc)
317    {
318    if (resource_error)
319      {
320      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
321        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
322      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
323      }
324    
325    exit(rc);
326    }
327    
328    
329    /*************************************************
330    *            OS-specific functions               *
331  *************************************************/  *************************************************/
332    
333  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
334  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
335    
336    
337  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
338    
339  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
340  #include <sys/types.h>  #include <sys/types.h>
341  #include <sys/stat.h>  #include <sys/stat.h>
342  #include <dirent.h>  #include <dirent.h>
343    
344  typedef DIR directory_type;  typedef DIR directory_type;
345    
346  int  static int
347  isdirectory(char *filename)  isdirectory(char *filename)
348  {  {
349  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 352  if (stat(filename, &statbuf) < 0)
352  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
353  }  }
354    
355  directory_type *  static directory_type *
356  opendirectory(char *filename)  opendirectory(char *filename)
357  {  {
358  return opendir(filename);  return opendir(filename);
359  }  }
360    
361  char *  static char *
362  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
363  {  {
364  for (;;)  for (;;)
# Line 108  for (;;) Line 368  for (;;)
368    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
369      return dent->d_name;      return dent->d_name;
370    }    }
371    /* Control never reaches here */
372    }
373    
374    static void
375    closedirectory(directory_type *dir)
376    {
377    closedir(dir);
378    }
379    
380    
381    /************* Test for regular file in Unix **********/
382    
383    static int
384    isregfile(char *filename)
385    {
386    struct stat statbuf;
387    if (stat(filename, &statbuf) < 0)
388      return 1;        /* In the expectation that opening as a file will fail */
389    return (statbuf.st_mode & S_IFMT) == S_IFREG;
390    }
391    
392    
393    /************* Test for a terminal in Unix **********/
394    
395    static BOOL
396    is_stdout_tty(void)
397    {
398    return isatty(fileno(stdout));
399    }
400    
401    static BOOL
402    is_file_tty(FILE *f)
403    {
404    return isatty(fileno(f));
405    }
406    
407    
408    /************* Directory scanning in Win32 ***********/
409    
410    /* I (Philip Hazel) have no means of testing this code. It was contributed by
411    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
412    when it did not exist. David Byron added a patch that moved the #include of
413    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
414    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
415    undefined when it is indeed undefined. */
416    
417    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
418    
419    #ifndef STRICT
420    # define STRICT
421    #endif
422    #ifndef WIN32_LEAN_AND_MEAN
423    # define WIN32_LEAN_AND_MEAN
424    #endif
425    
426    #include <windows.h>
427    
428    #ifndef INVALID_FILE_ATTRIBUTES
429    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
430    #endif
431    
432    typedef struct directory_type
433    {
434    HANDLE handle;
435    BOOL first;
436    WIN32_FIND_DATA data;
437    } directory_type;
438    
439    int
440    isdirectory(char *filename)
441    {
442    DWORD attr = GetFileAttributes(filename);
443    if (attr == INVALID_FILE_ATTRIBUTES)
444      return 0;
445    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
446    }
447    
448    directory_type *
449    opendirectory(char *filename)
450    {
451    size_t len;
452    char *pattern;
453    directory_type *dir;
454    DWORD err;
455    len = strlen(filename);
456    pattern = (char *) malloc(len + 3);
457    dir = (directory_type *) malloc(sizeof(*dir));
458    if ((pattern == NULL) || (dir == NULL))
459      {
460      fprintf(stderr, "pcregrep: malloc failed\n");
461      pcregrep_exit(2);
462      }
463    memcpy(pattern, filename, len);
464    memcpy(&(pattern[len]), "\\*", 3);
465    dir->handle = FindFirstFile(pattern, &(dir->data));
466    if (dir->handle != INVALID_HANDLE_VALUE)
467      {
468      free(pattern);
469      dir->first = TRUE;
470      return dir;
471      }
472    err = GetLastError();
473    free(pattern);
474    free(dir);
475    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
476    return NULL;
477    }
478    
479    char *
480    readdirectory(directory_type *dir)
481    {
482    for (;;)
483      {
484      if (!dir->first)
485        {
486        if (!FindNextFile(dir->handle, &(dir->data)))
487          return NULL;
488        }
489      else
490        {
491        dir->first = FALSE;
492        }
493      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
494        return dir->data.cFileName;
495      }
496    #ifndef _MSC_VER
497  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
498    #endif
499  }  }
500    
501  void  void
502  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
503  {  {
504  closedir(dir);  FindClose(dir->handle);
505    free(dir);
506  }  }
507    
508    
509  #else  /************* Test for regular file in Win32 **********/
510    
511    /* I don't know how to do this, or if it can be done; assume all paths are
512    regular if they are not directories. */
513    
514    int isregfile(char *filename)
515    {
516    return !isdirectory(filename);
517    }
518    
519    
520    /************* Test for a terminal in Win32 **********/
521    
522    /* I don't know how to do this; assume never */
523    
524    static BOOL
525    is_stdout_tty(void)
526    {
527    return FALSE;
528    }
529    
530    static BOOL
531    is_file_tty(FILE *f)
532    {
533    return FALSE;
534    }
535    
536    
537  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
538    
539  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
540    
541    #else
542    
543  typedef void directory_type;  typedef void directory_type;
544    
545  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
546  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
547  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
548  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
549    
550    
551    /************* Test for regular when we can't do it **********/
552    
553    /* Assume all files are regular. */
554    
555    int isregfile(char *filename) { return 1; }
556    
557    
558    /************* Test for a terminal when we can't do it **********/
559    
560    static BOOL
561    is_stdout_tty(void)
562    {
563    return FALSE;
564    }
565    
566    static BOOL
567    is_file_tty(FILE *f)
568    {
569    return FALSE;
570    }
571    
572  #endif  #endif
573    
574    
575    
576  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
577  /*************************************************  /*************************************************
578  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
579  *************************************************/  *************************************************/
# Line 159  return sys_errlist[n]; Line 596  return sys_errlist[n];
596    
597    
598  /*************************************************  /*************************************************
599  *              Grep an individual file           *  *            Read one line of input              *
600  *************************************************/  *************************************************/
601    
602    /* Normally, input is read using fread() into a large buffer, so many lines may
603    be read at once. However, doing this for tty input means that no output appears
604    until a lot of input has been typed. Instead, tty input is handled line by
605    line. We cannot use fgets() for this, because it does not stop at a binary
606    zero, and therefore there is no way of telling how many characters it has read,
607    because there may be binary zeros embedded in the data.
608    
609    Arguments:
610      buffer     the buffer to read into
611      length     the maximum number of characters to read
612      f          the file
613    
614    Returns:     the number of characters read, zero at end of file
615    */
616    
617  static int  static int
618  pcregrep(FILE *in, char *name)  read_one_line(char *buffer, int length, FILE *f)
619  {  {
620  int rc = 1;  int c;
621  int linenumber = 0;  int yield = 0;
622  int count = 0;  while ((c = fgetc(f)) != EOF)
623  int offsets[99];    {
624  char buffer[BUFSIZ];    buffer[yield++] = c;
625      if (c == '\n' || yield >= length) break;
626      }
627    return yield;
628    }
629    
630    
631    
632    /*************************************************
633    *             Find end of line                   *
634    *************************************************/
635    
636    /* The length of the endline sequence that is found is set via lenptr. This may
637    be zero at the very end of the file if there is no line-ending sequence there.
638    
639  while (fgets(buffer, sizeof(buffer), in) != NULL)  Arguments:
640      p         current position in line
641      endptr    end of available data
642      lenptr    where to put the length of the eol sequence
643    
644    Returns:    pointer after the last byte of the line,
645                including the newline byte(s)
646    */
647    
648    static char *
649    end_of_line(char *p, char *endptr, int *lenptr)
650    {
651    switch(endlinetype)
652    {    {
653    BOOL match = FALSE;    default:      /* Just in case */
654    int i;    case EL_LF:
655    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
656    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
657    linenumber++;      {
658        *lenptr = 1;
659        return p + 1;
660        }
661      *lenptr = 0;
662      return endptr;
663    
664    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
665      while (p < endptr && *p != '\r') p++;
666      if (p < endptr)
667      {      {
668      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
669        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
670      }      }
671      *lenptr = 0;
672      return endptr;
673    
674    if (match != invert)    case EL_CRLF:
675      for (;;)
676      {      {
677      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
678        if (++p >= endptr)
679          {
680          *lenptr = 0;
681          return endptr;
682          }
683        if (*p == '\n')
684          {
685          *lenptr = 2;
686          return p + 1;
687          }
688        }
689      break;
690    
691      case EL_ANYCRLF:
692      while (p < endptr)
693        {
694        int extra = 0;
695        register int c = *((unsigned char *)p);
696    
697      else if (filenames_only)      if (utf8 && c >= 0xc0)
698        {        {
699        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
700        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
701          gcss = 6*extra;
702          c = (c & utf8_table3[extra]) << gcss;
703          for (gcii = 1; gcii <= extra; gcii++)
704            {
705            gcss -= 6;
706            c |= (p[gcii] & 0x3f) << gcss;
707            }
708        }        }
709    
710      else if (silent) return 0;      p += 1 + extra;
711    
712      else      switch (c)
713        {        {
714        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
715        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
716        fprintf(stdout, "%s\n", buffer);        return p;
717    
718          case 0x0d:    /* CR */
719          if (p < endptr && *p == 0x0a)
720            {
721            *lenptr = 2;
722            p++;
723            }
724          else *lenptr = 1;
725          return p;
726    
727          default:
728          break;
729        }        }
730        }   /* End of loop for ANYCRLF case */
731    
732      rc = 0;    *lenptr = 0;  /* Must have hit the end */
733      }    return endptr;
   }  
734    
735  if (count_only)    case EL_ANY:
736    {    while (p < endptr)
737    if (name != NULL) fprintf(stdout, "%s:", name);      {
738    fprintf(stdout, "%d\n", count);      int extra = 0;
739    }      register int c = *((unsigned char *)p);
740    
741  return rc;      if (utf8 && c >= 0xc0)
742  }        {
743          int gcii, gcss;
744          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
745          gcss = 6*extra;
746          c = (c & utf8_table3[extra]) << gcss;
747          for (gcii = 1; gcii <= extra; gcii++)
748            {
749            gcss -= 6;
750            c |= (p[gcii] & 0x3f) << gcss;
751            }
752          }
753    
754        p += 1 + extra;
755    
756        switch (c)
757          {
758          case 0x0a:    /* LF */
759          case 0x0b:    /* VT */
760          case 0x0c:    /* FF */
761          *lenptr = 1;
762          return p;
763    
764          case 0x0d:    /* CR */
765          if (p < endptr && *p == 0x0a)
766            {
767            *lenptr = 2;
768            p++;
769            }
770          else *lenptr = 1;
771          return p;
772    
773          case 0x85:    /* NEL */
774          *lenptr = utf8? 2 : 1;
775          return p;
776    
777          case 0x2028:  /* LS */
778          case 0x2029:  /* PS */
779          *lenptr = 3;
780          return p;
781    
782          default:
783          break;
784          }
785        }   /* End of loop for ANY case */
786    
787      *lenptr = 0;  /* Must have hit the end */
788      return endptr;
789      }     /* End of overall switch */
790    }
791    
792    
793    
794  /*************************************************  /*************************************************
795  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
796  *************************************************/  *************************************************/
797    
798  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
799    
800  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
801  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
802      startptr  start of available data
803    
804  if ((sep = isdirectory(filename)) != 0 && recurse)  Returns:    pointer to the start of the previous line
805    {  */
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
806    
807    if (dir == NULL)  static char *
808      {  previous_line(char *p, char *startptr)
809      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  {
810        strerror(errno));  switch(endlinetype)
811      return 2;    {
812      }    default:      /* Just in case */
813      case EL_LF:
814      p--;
815      while (p > startptr && p[-1] != '\n') p--;
816      return p;
817    
818      case EL_CR:
819      p--;
820      while (p > startptr && p[-1] != '\n') p--;
821      return p;
822    
823    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
824      for (;;)
825      {      {
826      int frc;      p -= 2;
827      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      while (p > startptr && p[-1] != '\n') p--;
828      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);      if (p <= startptr + 1 || p[-2] == '\r') return p;
     if (frc == 0 && rc == 1) rc = 0;  
829      }      }
830      return p;   /* But control should never get here */
831    
832    closedirectory(dir);    case EL_ANY:
833    return rc;    case EL_ANYCRLF:
834    }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
835      if (utf8) while ((*p & 0xc0) == 0x80) p--;
836    
837  /* If the file is not a directory, or we are not recursing, scan it. If this is    while (p > startptr)
838  the first and only argument at top level, we don't show the file name.      {
839  Otherwise, control is via the show_filenames variable. */      register int c;
840        char *pp = p - 1;
841    
842  in = fopen(filename, "r");      if (utf8)
843  if (in == NULL)        {
844    {        int extra = 0;
845    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));        while ((*pp & 0xc0) == 0x80) pp--;
846    return 2;        c = *((unsigned char *)pp);
847    }        if (c >= 0xc0)
848            {
849            int gcii, gcss;
850            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
851            gcss = 6*extra;
852            c = (c & utf8_table3[extra]) << gcss;
853            for (gcii = 1; gcii <= extra; gcii++)
854              {
855              gcss -= 6;
856              c |= (pp[gcii] & 0x3f) << gcss;
857              }
858            }
859          }
860        else c = *((unsigned char *)pp);
861    
862  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);      if (endlinetype == EL_ANYCRLF) switch (c)
863  fclose(in);        {
864  return rc;        case 0x0a:    /* LF */
865  }        case 0x0d:    /* CR */
866          return p;
867    
868          default:
869          break;
870          }
871    
872        else switch (c)
873          {
874          case 0x0a:    /* LF */
875          case 0x0b:    /* VT */
876          case 0x0c:    /* FF */
877          case 0x0d:    /* CR */
878          case 0x85:    /* NEL */
879          case 0x2028:  /* LS */
880          case 0x2029:  /* PS */
881          return p;
882    
883          default:
884          break;
885          }
886    
887  /*************************************************      p = pp;  /* Back one character */
888  *                Usage function                  *      }        /* End of loop for ANY case */
 *************************************************/  
889    
890  static int    return startptr;  /* Hit start of data */
891  usage(int rc)    }     /* End of overall switch */
 {  
 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
892  }  }
893    
894    
895    
896    
897    
898  /*************************************************  /*************************************************
899  *                Help function                   *  *       Print the previous "after" lines         *
900  *************************************************/  *************************************************/
901    
902  static void  /* This is called if we are about to lose said lines because of buffer filling,
903    and at the end of the file. The data in the line is written using fwrite() so
904    that a binary zero does not terminate it.
905    
906    Arguments:
907      lastmatchnumber   the number of the last matching line, plus one
908      lastmatchrestart  where we restarted after the last match
909      endptr            end of available data
910      printname         filename for printing
911    
912    Returns:            nothing
913    */
914    
915    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
916      char *endptr, char *printname)
917    {
918    if (after_context > 0 && lastmatchnumber > 0)
919      {
920      int count = 0;
921      while (lastmatchrestart < endptr && count++ < after_context)
922        {
923        int ellength;
924        char *pp = lastmatchrestart;
925        if (printname != NULL) fprintf(stdout, "%s-", printname);
926        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
927        pp = end_of_line(pp, endptr, &ellength);
928        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
929        lastmatchrestart = pp;
930        }
931      hyphenpending = TRUE;
932      }
933    }
934    
935    
936    
937    /*************************************************
938    *   Apply patterns to subject till one matches   *
939    *************************************************/
940    
941    /* This function is called to run through all patterns, looking for a match. It
942    is used multiple times for the same subject when colouring is enabled, in order
943    to find all possible matches.
944    
945    Arguments:
946      matchptr     the start of the subject
947      length       the length of the subject to match
948      startoffset  where to start matching
949      offsets      the offets vector to fill in
950      mrc          address of where to put the result of pcre_exec()
951    
952    Returns:      TRUE if there was a match
953                  FALSE if there was no match
954                  invert if there was a non-fatal error
955    */
956    
957    static BOOL
958    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
959      int *mrc)
960    {
961    int i;
962    size_t slen = length;
963    const char *msg = "this text:\n\n";
964    if (slen > 200)
965      {
966      slen = 200;
967      msg = "text that starts:\n\n";
968      }
969    for (i = 0; i < pattern_count; i++)
970      {
971      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
972        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
973      if (*mrc >= 0) return TRUE;
974      if (*mrc == PCRE_ERROR_NOMATCH) continue;
975      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
976      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
977      fprintf(stderr, "%s", msg);
978      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
979      fprintf(stderr, "\n\n");
980      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
981        resource_error = TRUE;
982      if (error_count++ > 20)
983        {
984        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
985        pcregrep_exit(2);
986        }
987      return invert;    /* No more matching; don't show the line again */
988      }
989    
990    return FALSE;  /* No match, no errors */
991    }
992    
993    
994    
995    /*************************************************
996    *            Grep an individual file             *
997    *************************************************/
998    
999    /* This is called from grep_or_recurse() below. It uses a buffer that is three
1000    times the value of bufthird. The matching point is never allowed to stray into
1001    the top third of the buffer, thus keeping more of the file available for
1002    context printing or for multiline scanning. For large files, the pointer will
1003    be in the middle third most of the time, so the bottom third is available for
1004    "before" context printing.
1005    
1006    Arguments:
1007      handle       the fopened FILE stream for a normal file
1008                   the gzFile pointer when reading is via libz
1009                   the BZFILE pointer when reading is via libbz2
1010      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1011      filename     the file name or NULL (for errors)
1012      printname    the file name if it is to be printed for each match
1013                   or NULL if the file name is not to be printed
1014                   it cannot be NULL if filenames[_nomatch]_only is set
1015    
1016    Returns:       0 if there was at least one match
1017                   1 otherwise (no matches)
1018                   2 if an overlong line is encountered
1019                   3 if there is a read error on a .bz2 file
1020    */
1021    
1022    static int
1023    pcregrep(void *handle, int frtype, char *filename, char *printname)
1024    {
1025    int rc = 1;
1026    int linenumber = 1;
1027    int lastmatchnumber = 0;
1028    int count = 0;
1029    int filepos = 0;
1030    int offsets[OFFSET_SIZE];
1031    char *lastmatchrestart = NULL;
1032    char *ptr = main_buffer;
1033    char *endptr;
1034    size_t bufflength;
1035    BOOL endhyphenpending = FALSE;
1036    BOOL input_line_buffered = line_buffered;
1037    FILE *in = NULL;                    /* Ensure initialized */
1038    
1039    #ifdef SUPPORT_LIBZ
1040    gzFile ingz = NULL;
1041    #endif
1042    
1043    #ifdef SUPPORT_LIBBZ2
1044    BZFILE *inbz2 = NULL;
1045    #endif
1046    
1047    
1048    /* Do the first read into the start of the buffer and set up the pointer to end
1049    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1050    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1051    fail. */
1052    
1053    #ifdef SUPPORT_LIBZ
1054    if (frtype == FR_LIBZ)
1055      {
1056      ingz = (gzFile)handle;
1057      bufflength = gzread (ingz, main_buffer, bufsize);
1058      }
1059    else
1060    #endif
1061    
1062    #ifdef SUPPORT_LIBBZ2
1063    if (frtype == FR_LIBBZ2)
1064      {
1065      inbz2 = (BZFILE *)handle;
1066      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1067      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1068      }                                    /* without the cast it is unsigned. */
1069    else
1070    #endif
1071    
1072      {
1073      in = (FILE *)handle;
1074      if (is_file_tty(in)) input_line_buffered = TRUE;
1075      bufflength = input_line_buffered?
1076        read_one_line(main_buffer, bufsize, in) :
1077        fread(main_buffer, 1, bufsize, in);
1078      }
1079    
1080    endptr = main_buffer + bufflength;
1081    
1082    /* Loop while the current pointer is not at the end of the file. For large
1083    files, endptr will be at the end of the buffer when we are in the middle of the
1084    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1085    way, the buffer is shifted left and re-filled. */
1086    
1087    while (ptr < endptr)
1088      {
1089      int endlinelength;
1090      int mrc = 0;
1091      int startoffset = 0;
1092      BOOL match;
1093      char *matchptr = ptr;
1094      char *t = ptr;
1095      size_t length, linelength;
1096    
1097      /* At this point, ptr is at the start of a line. We need to find the length
1098      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1099      length remainder of the data in the buffer. Otherwise, it is the length of
1100      the next line, excluding the terminating newline. After matching, we always
1101      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1102      option is used for compiling, so that any match is constrained to be in the
1103      first line. */
1104    
1105      t = end_of_line(t, endptr, &endlinelength);
1106      linelength = t - ptr - endlinelength;
1107      length = multiline? (size_t)(endptr - ptr) : linelength;
1108    
1109      /* Check to see if the line we are looking at extends right to the very end
1110      of the buffer without a line terminator. This means the line is too long to
1111      handle. */
1112    
1113      if (endlinelength == 0 && t == main_buffer + bufsize)
1114        {
1115        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1116                        "pcregrep: check the --buffer-size option\n",
1117                        linenumber,
1118                        (filename == NULL)? "" : " of file ",
1119                        (filename == NULL)? "" : filename);
1120        return 2;
1121        }
1122    
1123      /* Extra processing for Jeffrey Friedl's debugging. */
1124    
1125    #ifdef JFRIEDL_DEBUG
1126      if (jfriedl_XT || jfriedl_XR)
1127      {
1128          #include <sys/time.h>
1129          #include <time.h>
1130          struct timeval start_time, end_time;
1131          struct timezone dummy;
1132          int i;
1133    
1134          if (jfriedl_XT)
1135          {
1136              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1137              const char *orig = ptr;
1138              ptr = malloc(newlen + 1);
1139              if (!ptr) {
1140                      printf("out of memory");
1141                      pcregrep_exit(2);
1142              }
1143              endptr = ptr;
1144              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1145              for (i = 0; i < jfriedl_XT; i++) {
1146                      strncpy(endptr, orig,  length);
1147                      endptr += length;
1148              }
1149              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1150              length = newlen;
1151          }
1152    
1153          if (gettimeofday(&start_time, &dummy) != 0)
1154                  perror("bad gettimeofday");
1155    
1156    
1157          for (i = 0; i < jfriedl_XR; i++)
1158              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1159                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1160    
1161          if (gettimeofday(&end_time, &dummy) != 0)
1162                  perror("bad gettimeofday");
1163    
1164          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1165                          -
1166                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1167    
1168          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1169          return 0;
1170      }
1171    #endif
1172    
1173      /* We come back here after a match when the -o option (only_matching) is set,
1174      in order to find any further matches in the same line. */
1175    
1176      ONLY_MATCHING_RESTART:
1177    
1178      /* Run through all the patterns until one matches or there is an error other
1179      than NOMATCH. This code is in a subroutine so that it can be re-used for
1180      finding subsequent matches when colouring matched lines. */
1181    
1182      match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1183    
1184      /* If it's a match or a not-match (as required), do what's wanted. */
1185    
1186      if (match != invert)
1187        {
1188        BOOL hyphenprinted = FALSE;
1189    
1190        /* We've failed if we want a file that doesn't have any matches. */
1191    
1192        if (filenames == FN_NOMATCH_ONLY) return 1;
1193    
1194        /* Just count if just counting is wanted. */
1195    
1196        if (count_only) count++;
1197    
1198        /* If all we want is a file name, there is no need to scan any more lines
1199        in the file. */
1200    
1201        else if (filenames == FN_MATCH_ONLY)
1202          {
1203          fprintf(stdout, "%s\n", printname);
1204          return 0;
1205          }
1206    
1207        /* Likewise, if all we want is a yes/no answer. */
1208    
1209        else if (quiet) return 0;
1210    
1211        /* The --only-matching option prints just the substring that matched, or a
1212        captured portion of it, as long as this string is not empty, and the
1213        --file-offsets and --line-offsets options output offsets for the matching
1214        substring (they both force --only-matching = 0). None of these options
1215        prints any context. Afterwards, adjust the start and then jump back to look
1216        for further matches in the same line. If we are in invert mode, however,
1217        nothing is printed and we do not restart - this could still be useful
1218        because the return code is set. */
1219    
1220        else if (only_matching >= 0)
1221          {
1222          if (!invert)
1223            {
1224            if (printname != NULL) fprintf(stdout, "%s:", printname);
1225            if (number) fprintf(stdout, "%d:", linenumber);
1226            if (line_offsets)
1227              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1228                offsets[1] - offsets[0]);
1229            else if (file_offsets)
1230              fprintf(stdout, "%d,%d\n",
1231                (int)(filepos + matchptr + offsets[0] - ptr),
1232                offsets[1] - offsets[0]);
1233            else if (only_matching < mrc)
1234              {
1235              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1236              if (plen > 0)
1237                {
1238                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1239                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1240                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1241                fprintf(stdout, "\n");
1242                }
1243              }
1244            else if (printname != NULL || number) fprintf(stdout, "\n");
1245            match = FALSE;
1246            if (line_buffered) fflush(stdout);
1247            rc = 0;                      /* Had some success */
1248            startoffset = offsets[1];    /* Restart after the match */
1249            goto ONLY_MATCHING_RESTART;
1250            }
1251          }
1252    
1253        /* This is the default case when none of the above options is set. We print
1254        the matching lines(s), possibly preceded and/or followed by other lines of
1255        context. */
1256    
1257        else
1258          {
1259          /* See if there is a requirement to print some "after" lines from a
1260          previous match. We never print any overlaps. */
1261    
1262          if (after_context > 0 && lastmatchnumber > 0)
1263            {
1264            int ellength;
1265            int linecount = 0;
1266            char *p = lastmatchrestart;
1267    
1268            while (p < ptr && linecount < after_context)
1269              {
1270              p = end_of_line(p, ptr, &ellength);
1271              linecount++;
1272              }
1273    
1274            /* It is important to advance lastmatchrestart during this printing so
1275            that it interacts correctly with any "before" printing below. Print
1276            each line's data using fwrite() in case there are binary zeroes. */
1277    
1278            while (lastmatchrestart < p)
1279              {
1280              char *pp = lastmatchrestart;
1281              if (printname != NULL) fprintf(stdout, "%s-", printname);
1282              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1283              pp = end_of_line(pp, endptr, &ellength);
1284              FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1285              lastmatchrestart = pp;
1286              }
1287            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1288            }
1289    
1290          /* If there were non-contiguous lines printed above, insert hyphens. */
1291    
1292          if (hyphenpending)
1293            {
1294            fprintf(stdout, "--\n");
1295            hyphenpending = FALSE;
1296            hyphenprinted = TRUE;
1297            }
1298    
1299          /* See if there is a requirement to print some "before" lines for this
1300          match. Again, don't print overlaps. */
1301    
1302          if (before_context > 0)
1303            {
1304            int linecount = 0;
1305            char *p = ptr;
1306    
1307            while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1308                   linecount < before_context)
1309              {
1310              linecount++;
1311              p = previous_line(p, main_buffer);
1312              }
1313    
1314            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1315              fprintf(stdout, "--\n");
1316    
1317            while (p < ptr)
1318              {
1319              int ellength;
1320              char *pp = p;
1321              if (printname != NULL) fprintf(stdout, "%s-", printname);
1322              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1323              pp = end_of_line(pp, endptr, &ellength);
1324              FWRITE(p, 1, pp - p, stdout);
1325              p = pp;
1326              }
1327            }
1328    
1329          /* Now print the matching line(s); ensure we set hyphenpending at the end
1330          of the file if any context lines are being output. */
1331    
1332          if (after_context > 0 || before_context > 0)
1333            endhyphenpending = TRUE;
1334    
1335          if (printname != NULL) fprintf(stdout, "%s:", printname);
1336          if (number) fprintf(stdout, "%d:", linenumber);
1337    
1338          /* In multiline mode, we want to print to the end of the line in which
1339          the end of the matched string is found, so we adjust linelength and the
1340          line number appropriately, but only when there actually was a match
1341          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1342          the match will always be before the first newline sequence. */
1343    
1344          if (multiline & !invert)
1345            {
1346            char *endmatch = ptr + offsets[1];
1347            t = ptr;
1348            while (t < endmatch)
1349              {
1350              t = end_of_line(t, endptr, &endlinelength);
1351              if (t < endmatch) linenumber++; else break;
1352              }
1353            linelength = t - ptr - endlinelength;
1354            }
1355    
1356          /*** NOTE: Use only fwrite() to output the data line, so that binary
1357          zeroes are treated as just another data character. */
1358    
1359          /* This extra option, for Jeffrey Friedl's debugging requirements,
1360          replaces the matched string, or a specific captured string if it exists,
1361          with X. When this happens, colouring is ignored. */
1362    
1363    #ifdef JFRIEDL_DEBUG
1364          if (S_arg >= 0 && S_arg < mrc)
1365            {
1366            int first = S_arg * 2;
1367            int last  = first + 1;
1368            FWRITE(ptr, 1, offsets[first], stdout);
1369            fprintf(stdout, "X");
1370            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1371            }
1372          else
1373    #endif
1374    
1375          /* We have to split the line(s) up if colouring, and search for further
1376          matches, but not of course if the line is a non-match. */
1377    
1378          if (do_colour && !invert)
1379            {
1380            int plength;
1381            FWRITE(ptr, 1, offsets[0], stdout);
1382            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1383            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1384            fprintf(stdout, "%c[00m", 0x1b);
1385            for (;;)
1386              {
1387              startoffset = offsets[1];
1388              if (startoffset >= linelength + endlinelength ||
1389                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1390                break;
1391              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1392              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1393              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1394              fprintf(stdout, "%c[00m", 0x1b);
1395              }
1396    
1397            /* In multiline mode, we may have already printed the complete line
1398            and its line-ending characters (if they matched the pattern), so there
1399            may be no more to print. */
1400    
1401            plength = (linelength + endlinelength) - startoffset;
1402            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1403            }
1404    
1405          /* Not colouring; no need to search for further matches */
1406    
1407          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1408          }
1409    
1410        /* End of doing what has to be done for a match. If --line-buffered was
1411        given, flush the output. */
1412    
1413        if (line_buffered) fflush(stdout);
1414        rc = 0;    /* Had some success */
1415    
1416        /* Remember where the last match happened for after_context. We remember
1417        where we are about to restart, and that line's number. */
1418    
1419        lastmatchrestart = ptr + linelength + endlinelength;
1420        lastmatchnumber = linenumber + 1;
1421        }
1422    
1423      /* For a match in multiline inverted mode (which of course did not cause
1424      anything to be printed), we have to move on to the end of the match before
1425      proceeding. */
1426    
1427      if (multiline && invert && match)
1428        {
1429        int ellength;
1430        char *endmatch = ptr + offsets[1];
1431        t = ptr;
1432        while (t < endmatch)
1433          {
1434          t = end_of_line(t, endptr, &ellength);
1435          if (t <= endmatch) linenumber++; else break;
1436          }
1437        endmatch = end_of_line(endmatch, endptr, &ellength);
1438        linelength = endmatch - ptr - ellength;
1439        }
1440    
1441      /* Advance to after the newline and increment the line number. The file
1442      offset to the current line is maintained in filepos. */
1443    
1444      ptr += linelength + endlinelength;
1445      filepos += (int)(linelength + endlinelength);
1446      linenumber++;
1447    
1448      /* If input is line buffered, and the buffer is not yet full, read another
1449      line and add it into the buffer. */
1450    
1451      if (input_line_buffered && bufflength < bufsize)
1452        {
1453        int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
1454        bufflength += add;
1455        endptr += add;
1456        }
1457    
1458      /* If we haven't yet reached the end of the file (the buffer is full), and
1459      the current point is in the top 1/3 of the buffer, slide the buffer down by
1460      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1461      about to be lost, print them. */
1462    
1463      if (bufflength >= bufsize && ptr > main_buffer + 2*bufthird)
1464        {
1465        if (after_context > 0 &&
1466            lastmatchnumber > 0 &&
1467            lastmatchrestart < main_buffer + bufthird)
1468          {
1469          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1470          lastmatchnumber = 0;
1471          }
1472    
1473        /* Now do the shuffle */
1474    
1475        memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1476        ptr -= bufthird;
1477    
1478    #ifdef SUPPORT_LIBZ
1479        if (frtype == FR_LIBZ)
1480          bufflength = 2*bufthird +
1481            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1482        else
1483    #endif
1484    
1485    #ifdef SUPPORT_LIBBZ2
1486        if (frtype == FR_LIBBZ2)
1487          bufflength = 2*bufthird +
1488            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1489        else
1490    #endif
1491    
1492        bufflength = 2*bufthird +
1493          (input_line_buffered?
1494           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1495           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1496        endptr = main_buffer + bufflength;
1497    
1498        /* Adjust any last match point */
1499    
1500        if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1501        }
1502      }     /* Loop through the whole file */
1503    
1504    /* End of file; print final "after" lines if wanted; do_after_lines sets
1505    hyphenpending if it prints something. */
1506    
1507    if (only_matching < 0 && !count_only)
1508      {
1509      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1510      hyphenpending |= endhyphenpending;
1511      }
1512    
1513    /* Print the file name if we are looking for those without matches and there
1514    were none. If we found a match, we won't have got this far. */
1515    
1516    if (filenames == FN_NOMATCH_ONLY)
1517      {
1518      fprintf(stdout, "%s\n", printname);
1519      return 0;
1520      }
1521    
1522    /* Print the match count if wanted */
1523    
1524    if (count_only)
1525      {
1526      if (count > 0 || !omit_zero_count)
1527        {
1528        if (printname != NULL && filenames != FN_NONE)
1529          fprintf(stdout, "%s:", printname);
1530        fprintf(stdout, "%d\n", count);
1531        }
1532      }
1533    
1534    return rc;
1535    }
1536    
1537    
1538    
1539    /*************************************************
1540    *     Grep a file or recurse into a directory    *
1541    *************************************************/
1542    
1543    /* Given a path name, if it's a directory, scan all the files if we are
1544    recursing; if it's a file, grep it.
1545    
1546    Arguments:
1547      pathname          the path to investigate
1548      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1549      only_one_at_top   TRUE if the path is the only one at toplevel
1550    
1551    Returns:   0 if there was at least one match
1552               1 if there were no matches
1553               2 there was some kind of error
1554    
1555    However, file opening failures are suppressed if "silent" is set.
1556    */
1557    
1558    static int
1559    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1560    {
1561    int rc = 1;
1562    int sep;
1563    int frtype;
1564    int pathlen;
1565    void *handle;
1566    FILE *in = NULL;           /* Ensure initialized */
1567    
1568    #ifdef SUPPORT_LIBZ
1569    gzFile ingz = NULL;
1570    #endif
1571    
1572    #ifdef SUPPORT_LIBBZ2
1573    BZFILE *inbz2 = NULL;
1574    #endif
1575    
1576    /* If the file name is "-" we scan stdin */
1577    
1578    if (strcmp(pathname, "-") == 0)
1579      {
1580      return pcregrep(stdin, FR_PLAIN, stdin_name,
1581        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1582          stdin_name : NULL);
1583      }
1584    
1585    /* If the file is a directory, skip if skipping or if we are recursing, scan
1586    each file and directory within it, subject to any include or exclude patterns
1587    that were set. The scanning code is localized so it can be made
1588    system-specific. */
1589    
1590    if ((sep = isdirectory(pathname)) != 0)
1591      {
1592      if (dee_action == dee_SKIP) return 1;
1593      if (dee_action == dee_RECURSE)
1594        {
1595        char buffer[1024];
1596        char *nextfile;
1597        directory_type *dir = opendirectory(pathname);
1598    
1599        if (dir == NULL)
1600          {
1601          if (!silent)
1602            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1603              strerror(errno));
1604          return 2;
1605          }
1606    
1607        while ((nextfile = readdirectory(dir)) != NULL)
1608          {
1609          int frc, nflen;
1610          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1611          nflen = (int)(strlen(nextfile));
1612    
1613          if (isdirectory(buffer))
1614            {
1615            if (exclude_dir_compiled != NULL &&
1616                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1617              continue;
1618    
1619            if (include_dir_compiled != NULL &&
1620                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1621              continue;
1622            }
1623          else
1624            {
1625            if (exclude_compiled != NULL &&
1626                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1627              continue;
1628    
1629            if (include_compiled != NULL &&
1630                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1631              continue;
1632            }
1633    
1634          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1635          if (frc > 1) rc = frc;
1636           else if (frc == 0 && rc == 1) rc = 0;
1637          }
1638    
1639        closedirectory(dir);
1640        return rc;
1641        }
1642      }
1643    
1644    /* If the file is not a directory and not a regular file, skip it if that's
1645    been requested. */
1646    
1647    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1648    
1649    /* Control reaches here if we have a regular file, or if we have a directory
1650    and recursion or skipping was not requested, or if we have anything else and
1651    skipping was not requested. The scan proceeds. If this is the first and only
1652    argument at top level, we don't show the file name, unless we are only showing
1653    the file name, or the filename was forced (-H). */
1654    
1655    pathlen = (int)(strlen(pathname));
1656    
1657    /* Open using zlib if it is supported and the file name ends with .gz. */
1658    
1659    #ifdef SUPPORT_LIBZ
1660    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1661      {
1662      ingz = gzopen(pathname, "rb");
1663      if (ingz == NULL)
1664        {
1665        if (!silent)
1666          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1667            strerror(errno));
1668        return 2;
1669        }
1670      handle = (void *)ingz;
1671      frtype = FR_LIBZ;
1672      }
1673    else
1674    #endif
1675    
1676    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1677    
1678    #ifdef SUPPORT_LIBBZ2
1679    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1680      {
1681      inbz2 = BZ2_bzopen(pathname, "rb");
1682      handle = (void *)inbz2;
1683      frtype = FR_LIBBZ2;
1684      }
1685    else
1686    #endif
1687    
1688    /* Otherwise use plain fopen(). The label is so that we can come back here if
1689    an attempt to read a .bz2 file indicates that it really is a plain file. */
1690    
1691    #ifdef SUPPORT_LIBBZ2
1692    PLAIN_FILE:
1693    #endif
1694      {
1695      in = fopen(pathname, "rb");
1696      handle = (void *)in;
1697      frtype = FR_PLAIN;
1698      }
1699    
1700    /* All the opening methods return errno when they fail. */
1701    
1702    if (handle == NULL)
1703      {
1704      if (!silent)
1705        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1706          strerror(errno));
1707      return 2;
1708      }
1709    
1710    /* Now grep the file */
1711    
1712    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1713      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1714    
1715    /* Close in an appropriate manner. */
1716    
1717    #ifdef SUPPORT_LIBZ
1718    if (frtype == FR_LIBZ)
1719      gzclose(ingz);
1720    else
1721    #endif
1722    
1723    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1724    read failed. If the error indicates that the file isn't in fact bzipped, try
1725    again as a normal file. */
1726    
1727    #ifdef SUPPORT_LIBBZ2
1728    if (frtype == FR_LIBBZ2)
1729      {
1730      if (rc == 3)
1731        {
1732        int errnum;
1733        const char *err = BZ2_bzerror(inbz2, &errnum);
1734        if (errnum == BZ_DATA_ERROR_MAGIC)
1735          {
1736          BZ2_bzclose(inbz2);
1737          goto PLAIN_FILE;
1738          }
1739        else if (!silent)
1740          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1741            pathname, err);
1742        rc = 2;    /* The normal "something went wrong" code */
1743        }
1744      BZ2_bzclose(inbz2);
1745      }
1746    else
1747    #endif
1748    
1749    /* Normal file close */
1750    
1751    fclose(in);
1752    
1753    /* Pass back the yield from pcregrep(). */
1754    
1755    return rc;
1756    }
1757    
1758    
1759    
1760    
1761    /*************************************************
1762    *                Usage function                  *
1763    *************************************************/
1764    
1765    static int
1766    usage(int rc)
1767    {
1768    option_item *op;
1769    fprintf(stderr, "Usage: pcregrep [-");
1770    for (op = optionlist; op->one_char != 0; op++)
1771      {
1772      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1773      }
1774    fprintf(stderr, "] [long options] [pattern] [files]\n");
1775    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1776      "options.\n");
1777    return rc;
1778    }
1779    
1780    
1781    
1782    
1783    /*************************************************
1784    *                Help function                   *
1785    *************************************************/
1786    
1787    static void
1788  help(void)  help(void)
1789  {  {
1790  option_item *op;  option_item *op;
1791    
1792  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1793  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1794  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1795    printf("\"-\" can be used as a file name to mean STDIN.\n");
1796    
1797    #ifdef SUPPORT_LIBZ
1798    printf("Files whose names end in .gz are read using zlib.\n");
1799    #endif
1800    
1801    #ifdef SUPPORT_LIBBZ2
1802    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1803    #endif
1804    
1805    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1806    printf("Other files and the standard input are read as plain files.\n\n");
1807    #else
1808    printf("All files are read as plain files, without any interpretation.\n\n");
1809    #endif
1810    
1811    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1812  printf("Options:\n");  printf("Options:\n");
1813    
1814  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
1815    {    {
1816    int n;    int n;
1817    char s[4];    char s[4];
1818    
1819      /* Two options were accidentally implemented and documented with underscores
1820      instead of hyphens in their names, something that was not noticed for quite a
1821      few releases. When fixing this, I left the underscored versions in the list
1822      in case people were using them. However, we don't want to display them in the
1823      help data. There are no other options that contain underscores, and we do not
1824      expect ever to implement such options. Therefore, just omit any option that
1825      contains an underscore. */
1826    
1827      if (strchr(op->long_name, '_') != NULL) continue;
1828    
1829    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1830    printf("  %s --%s%n", s, op->long_name, &n);    n = 31 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1831    if (n < 1) n = 1;    if (n < 1) n = 1;
1832    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1833    }    }
1834    
1835  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1836  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1837  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("When reading patterns from a file instead of using a command line option,\n");
1838  printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("trailing white space is removed and blank lines are ignored.\n");
1839    printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1840      MAX_PATTERN_COUNT, PATBUFSIZE);
1841    
1842  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1843  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1844  }  }
1845    
# Line 334  printf("Exit status is 0 if any matches, Line 1847  printf("Exit status is 0 if any matches,
1847    
1848    
1849  /*************************************************  /*************************************************
1850  *                Handle an option                *  *    Handle a single-letter, no data option      *
1851  *************************************************/  *************************************************/
1852    
1853  static int  static int
# Line 342  handle_option(int letter, int options) Line 1855  handle_option(int letter, int options)
1855  {  {
1856  switch(letter)  switch(letter)
1857    {    {
1858    case -1:  help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1859      case N_HELP: help(); pcregrep_exit(0);
1860      case N_LOFFSETS: line_offsets = number = TRUE; break;
1861      case N_LBUFFER: line_buffered = TRUE; break;
1862    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1863    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1864      case 'H': filenames = FN_FORCE; break;
1865      case 'h': filenames = FN_NONE; break;
1866    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1867    case 'l': filenames_only = TRUE;    case 'j': study_options |= PCRE_STUDY_JIT_COMPILE; break;
1868      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1869      case 'L': filenames = FN_NOMATCH_ONLY; break;
1870      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1871    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1872    case 'r': recurse = TRUE; break;    case 'o': only_matching = 0; break;
1873      case 'q': quiet = TRUE; break;
1874      case 'r': dee_action = dee_RECURSE; break;
1875    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1876      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1877    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1878    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1879      case 'x': process_options |= PO_LINE_MATCH; break;
1880    
1881    case 'V':    case 'V':
1882    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1883    fprintf(stderr, "PCRE version %s\n", pcre_version());    pcregrep_exit(0);
   exit(0);  
1884    break;    break;
1885    
1886    default:    default:
1887    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1888    exit(usage(2));    pcregrep_exit(usage(2));
1889      }
1890    
1891    return options;
1892    }
1893    
1894    
1895    
1896    
1897    /*************************************************
1898    *          Construct printed ordinal             *
1899    *************************************************/
1900    
1901    /* This turns a number into "1st", "3rd", etc. */
1902    
1903    static char *
1904    ordin(int n)
1905    {
1906    static char buffer[8];
1907    char *p = buffer;
1908    sprintf(p, "%d", n);
1909    while (*p != 0) p++;
1910    switch (n%10)
1911      {
1912      case 1: strcpy(p, "st"); break;
1913      case 2: strcpy(p, "nd"); break;
1914      case 3: strcpy(p, "rd"); break;
1915      default: strcpy(p, "th"); break;
1916      }
1917    return buffer;
1918    }
1919    
1920    
1921    
1922    /*************************************************
1923    *          Compile a single pattern              *
1924    *************************************************/
1925    
1926    /* When the -F option has been used, this is called for each substring.
1927    Otherwise it's called for each supplied pattern.
1928    
1929    Arguments:
1930      pattern        the pattern string
1931      options        the PCRE options
1932      filename       the file name, or NULL for a command-line pattern
1933      count          0 if this is the only command line pattern, or
1934                     number of the command line pattern, or
1935                     linenumber for a pattern from a file
1936    
1937    Returns:         TRUE on success, FALSE after an error
1938    */
1939    
1940    static BOOL
1941    compile_single_pattern(char *pattern, int options, char *filename, int count)
1942    {
1943    char buffer[PATBUFSIZE];
1944    const char *error;
1945    int errptr;
1946    
1947    if (pattern_count >= MAX_PATTERN_COUNT)
1948      {
1949      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1950        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1951      return FALSE;
1952      }
1953    
1954    sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1955      suffix[process_options]);
1956    pattern_list[pattern_count] =
1957      pcre_compile(buffer, options, &error, &errptr, pcretables);
1958    if (pattern_list[pattern_count] != NULL)
1959      {
1960      pattern_count++;
1961      return TRUE;
1962      }
1963    
1964    /* Handle compile errors */
1965    
1966    errptr -= (int)strlen(prefix[process_options]);
1967    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1968    
1969    if (filename == NULL)
1970      {
1971      if (count == 0)
1972        fprintf(stderr, "pcregrep: Error in command-line regex "
1973          "at offset %d: %s\n", errptr, error);
1974      else
1975        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1976          "at offset %d: %s\n", ordin(count), errptr, error);
1977      }
1978    else
1979      {
1980      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1981        "at offset %d: %s\n", count, filename, errptr, error);
1982    }    }
1983    
1984  return options;  return FALSE;
1985  }  }
1986    
1987    
1988    
1989    /*************************************************
1990    *           Compile one supplied pattern         *
1991    *************************************************/
1992    
1993    /* When the -F option has been used, each string may be a list of strings,
1994    separated by line breaks. They will be matched literally.
1995    
1996    Arguments:
1997      pattern        the pattern string
1998      options        the PCRE options
1999      filename       the file name, or NULL for a command-line pattern
2000      count          0 if this is the only command line pattern, or
2001                     number of the command line pattern, or
2002                     linenumber for a pattern from a file
2003    
2004    Returns:         TRUE on success, FALSE after an error
2005    */
2006    
2007    static BOOL
2008    compile_pattern(char *pattern, int options, char *filename, int count)
2009    {
2010    if ((process_options & PO_FIXED_STRINGS) != 0)
2011      {
2012      char *eop = pattern + strlen(pattern);
2013      char buffer[PATBUFSIZE];
2014      for(;;)
2015        {
2016        int ellength;
2017        char *p = end_of_line(pattern, eop, &ellength);
2018        if (ellength == 0)
2019          return compile_single_pattern(pattern, options, filename, count);
2020        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2021        pattern = p;
2022        if (!compile_single_pattern(buffer, options, filename, count))
2023          return FALSE;
2024        }
2025      }
2026    else return compile_single_pattern(pattern, options, filename, count);
2027    }
2028    
2029    
2030    
2031  /*************************************************  /*************************************************
2032  *                Main program                    *  *                Main program                    *
2033  *************************************************/  *************************************************/
2034    
2035    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2036    
2037  int  int
2038  main(int argc, char **argv)  main(int argc, char **argv)
2039  {  {
2040  int i, j;  int i, j;
2041  int rc = 1;  int rc = 1;
2042  int options = 0;  int pcre_options = 0;
2043    int cmd_pattern_count = 0;
2044    int hint_count = 0;
2045  int errptr;  int errptr;
 const char *error;  
2046  BOOL only_one_at_top;  BOOL only_one_at_top;
2047    char *patterns[MAX_PATTERN_COUNT];
2048    const char *locale_from = "--locale";
2049    const char *error;
2050    
2051    /* Set the default line ending value from the default in the PCRE library;
2052    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2053    Note that the return values from pcre_config(), though derived from the ASCII
2054    codes, are the same in EBCDIC environments, so we must use the actual values
2055    rather than escapes such as as '\r'. */
2056    
2057    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2058    switch(i)
2059      {
2060      default:               newline = (char *)"lf"; break;
2061      case 13:               newline = (char *)"cr"; break;
2062      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2063      case -1:               newline = (char *)"any"; break;
2064      case -2:               newline = (char *)"anycrlf"; break;
2065      }
2066    
2067  /* Process the options */  /* Process the options */
2068    
2069  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
2070    {    {
2071      option_item *op = NULL;
2072      char *option_data = (char *)"";    /* default to keep compiler happy */
2073      BOOL longop;
2074      BOOL longopwasequals = FALSE;
2075    
2076    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2077    
2078    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2079      but only if we have previously had -e or -f to define the patterns. */
2080    
2081      if (argv[i][1] == 0)
2082        {
2083        if (pattern_filename != NULL || pattern_count > 0) break;
2084          else pcregrep_exit(usage(2));
2085        }
2086    
2087      /* Handle a long name option, or -- to terminate the options */
2088    
2089    if (argv[i][1] == '-')    if (argv[i][1] == '-')
2090      {      {
2091      option_item *op;      char *arg = argv[i] + 2;
2092        char *argequals = strchr(arg, '=');
2093    
2094      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
2095        {        {
2096        pattern_filename = argv[i] + 7;        i++;
2097        continue;        break;                /* out of the options-handling loop */
2098        }        }
2099    
2100        longop = TRUE;
2101    
2102        /* Some long options have data that follows after =, for example file=name.
2103        Some options have variations in the long name spelling: specifically, we
2104        allow "regexp" because GNU grep allows it, though I personally go along
2105        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2106        These options are entered in the table as "regex(p)". Options can be in
2107        both these categories. */
2108    
2109      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2110        {        {
2111        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
2112          char *equals = strchr(op->long_name, '=');
2113    
2114          /* Handle options with only one spelling of the name */
2115    
2116          if (opbra == NULL)     /* Does not contain '(' */
2117            {
2118            if (equals == NULL)  /* Not thing=data case */
2119              {
2120              if (strcmp(arg, op->long_name) == 0) break;
2121              }
2122            else                 /* Special case xxx=data */
2123              {
2124              int oplen = (int)(equals - op->long_name);
2125              int arglen = (argequals == NULL)?
2126                (int)strlen(arg) : (int)(argequals - arg);
2127              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2128                {
2129                option_data = arg + arglen;
2130                if (*option_data == '=')
2131                  {
2132                  option_data++;
2133                  longopwasequals = TRUE;
2134                  }
2135                break;
2136                }
2137              }
2138            }
2139    
2140          /* Handle options with an alternate spelling of the name */
2141    
2142          else
2143          {          {
2144          options = handle_option(op->one_char, options);          char buff1[24];
2145          break;          char buff2[24];
2146    
2147            int baselen = (int)(opbra - op->long_name);
2148            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2149            int arglen = (argequals == NULL || equals == NULL)?
2150              (int)strlen(arg) : (int)(argequals - arg);
2151    
2152            sprintf(buff1, "%.*s", baselen, op->long_name);
2153            sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2154    
2155            if (strncmp(arg, buff1, arglen) == 0 ||
2156               strncmp(arg, buff2, arglen) == 0)
2157              {
2158              if (equals != NULL && argequals != NULL)
2159                {
2160                option_data = argequals;
2161                if (*option_data == '=')
2162                  {
2163                  option_data++;
2164                  longopwasequals = TRUE;
2165                  }
2166                }
2167              break;
2168              }
2169          }          }
2170        }        }
2171    
2172      if (op->one_char == 0)      if (op->one_char == 0)
2173        {        {
2174        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2175        exit(usage(2));        pcregrep_exit(usage(2));
2176        }        }
2177      }      }
2178    
2179    /* One-char options */    /* Jeffrey Friedl's debugging harness uses these additional options which
2180      are not in the right form for putting in the option table because they use
2181      only one hyphen, yet are more than one character long. By putting them
2182      separately here, they will not get displayed as part of the help() output,
2183      but I don't think Jeffrey will care about that. */
2184    
2185    #ifdef JFRIEDL_DEBUG
2186      else if (strcmp(argv[i], "-pre") == 0) {
2187              jfriedl_prefix = argv[++i];
2188              continue;
2189      } else if (strcmp(argv[i], "-post") == 0) {
2190              jfriedl_postfix = argv[++i];
2191              continue;
2192      } else if (strcmp(argv[i], "-XT") == 0) {
2193              sscanf(argv[++i], "%d", &jfriedl_XT);
2194              continue;
2195      } else if (strcmp(argv[i], "-XR") == 0) {
2196              sscanf(argv[++i], "%d", &jfriedl_XR);
2197              continue;
2198      }
2199    #endif
2200    
2201    
2202      /* One-char options; many that have no data may be in a single argument; we
2203      continue till we hit the last one or one that needs data. */
2204    
2205    else    else
2206      {      {
2207      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2208        longop = FALSE;
2209      while (*s != 0)      while (*s != 0)
2210        {        {
2211        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2212          {          {
2213          pattern_filename = s + 1;          if (*s == op->one_char) break;
2214          if (pattern_filename[0] == 0)          }
2215            {        if (op->one_char == 0)
2216            if (i >= argc - 1)          {
2217              {          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2218              fprintf(stderr, "pcregrep: File name missing after -f\n");            *s, argv[i]);
2219              exit(usage(2));          pcregrep_exit(usage(2));
2220              }          }
2221            pattern_filename = argv[++i];  
2222            }        /* Check for a single-character option that has data: OP_OP_NUMBER
2223          break;        is used for one that either has a numerical number or defaults, i.e. the
2224          data is optional. If a digit follows, there is data; if not, carry on
2225          with other single-character options in the same string. */
2226    
2227          option_data = s+1;
2228          if (op->type == OP_OP_NUMBER)
2229            {
2230            if (isdigit((unsigned char)s[1])) break;
2231            }
2232          else   /* Check for end or a dataless option */
2233            {
2234            if (op->type != OP_NODATA || s[1] == 0) break;
2235          }          }
2236        else options = handle_option(*s++, options);  
2237          /* Handle a single-character option with no data, then loop for the
2238          next character in the string. */
2239    
2240          pcre_options = handle_option(*s++, pcre_options);
2241        }        }
2242      }      }
   }  
2243    
2244  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
2245  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
2246      something in the PCRE options. */
2247    
2248  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
2249    {      {
2250    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
2251    return 2;      continue;
2252    }      }
2253    
2254  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2255      either has a value or defaults to something. It cannot have data in a
2256      separate item. At the moment, the only such options are "colo(u)r",
2257      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2258    
2259  if (pattern_filename != NULL)    if (*option_data == 0 &&
2260    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
2261      {      {
2262      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
2263        strerror(errno));        {
2264      return 2;        case N_COLOUR:
2265          colour_option = (char *)"auto";
2266          break;
2267    
2268          case 'o':
2269          only_matching = 0;
2270          break;
2271    
2272    #ifdef JFRIEDL_DEBUG
2273          case 'S':
2274          S_arg = 0;
2275          break;
2276    #endif
2277          }
2278        continue;
2279      }      }
2280    while (fgets(buffer, sizeof(buffer), f) != NULL)  
2281      /* Otherwise, find the data string for the option. */
2282    
2283      if (*option_data == 0)
2284      {      {
2285      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
2286      if (pattern_count >= MAX_PATTERN_COUNT)        {
2287          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2288          pcregrep_exit(usage(2));
2289          }
2290        option_data = argv[++i];
2291        }
2292    
2293      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2294      multiple times to create a list of patterns. */
2295    
2296      if (op->type == OP_PATLIST)
2297        {
2298        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2299        {        {
2300        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2301          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2302        return 2;        return 2;
2303        }        }
2304      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2305      if (s == buffer) continue;      }
2306      *s = 0;  
2307      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2308        &errptr, NULL);  
2309      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2310               op->type != OP_OP_NUMBER)
2311        {
2312        *((char **)op->dataptr) = option_data;
2313        }
2314    
2315      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2316      only for unpicking arguments, so just keep it simple. */
2317    
2318      else
2319        {
2320        unsigned long int n = 0;
2321        char *endptr = option_data;
2322        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2323        while (isdigit((unsigned char)(*endptr)))
2324          n = n * 10 + (int)(*endptr++ - '0');
2325        if (toupper(*endptr) == 'K')
2326        {        {
2327        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        n *= 1024;
2328          pattern_count, errptr, error);        endptr++;
       return 2;  
2329        }        }
2330        else if (toupper(*endptr) == 'M')
2331          {
2332          n *= 1024*1024;
2333          endptr++;
2334          }
2335        if (*endptr != 0)
2336          {
2337          if (longop)
2338            {
2339            char *equals = strchr(op->long_name, '=');
2340            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2341              (int)(equals - op->long_name);
2342            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2343              option_data, nlen, op->long_name);
2344            }
2345          else
2346            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2347              option_data, op->one_char);
2348          pcregrep_exit(usage(2));
2349          }
2350        if (op->type == OP_LONGNUMBER)
2351            *((unsigned long int *)op->dataptr) = n;
2352        else
2353            *((int *)op->dataptr) = n;
2354        }
2355      }
2356    
2357    /* Options have been decoded. If -C was used, its value is used as a default
2358    for -A and -B. */
2359    
2360    if (both_context > 0)
2361      {
2362      if (after_context == 0) after_context = both_context;
2363      if (before_context == 0) before_context = both_context;
2364      }
2365    
2366    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2367    However, the latter two set only_matching. */
2368    
2369    if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2370        (file_offsets && line_offsets))
2371      {
2372      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2373        "and/or --line-offsets\n");
2374      pcregrep_exit(usage(2));
2375      }
2376    
2377    if (file_offsets || line_offsets) only_matching = 0;
2378    
2379    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2380    LC_ALL environment variable is set, and if so, use it. */
2381    
2382    if (locale == NULL)
2383      {
2384      locale = getenv("LC_ALL");
2385      locale_from = "LCC_ALL";
2386      }
2387    
2388    if (locale == NULL)
2389      {
2390      locale = getenv("LC_CTYPE");
2391      locale_from = "LC_CTYPE";
2392      }
2393    
2394    /* If a locale has been provided, set it, and generate the tables the PCRE
2395    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2396    
2397    if (locale != NULL)
2398      {
2399      if (setlocale(LC_CTYPE, locale) == NULL)
2400        {
2401        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2402          locale, locale_from);
2403        return 2;
2404        }
2405      pcretables = pcre_maketables();
2406      }
2407    
2408    /* Sort out colouring */
2409    
2410    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2411      {
2412      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2413      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2414      else
2415        {
2416        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2417          colour_option);
2418        return 2;
2419        }
2420      if (do_colour)
2421        {
2422        char *cs = getenv("PCREGREP_COLOUR");
2423        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2424        if (cs != NULL) colour_string = cs;
2425      }      }
   fclose(f);  
2426    }    }
2427    
2428  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2429    
2430    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2431      {
2432      pcre_options |= PCRE_NEWLINE_CR;
2433      endlinetype = EL_CR;
2434      }
2435    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2436      {
2437      pcre_options |= PCRE_NEWLINE_LF;
2438      endlinetype = EL_LF;
2439      }
2440    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2441      {
2442      pcre_options |= PCRE_NEWLINE_CRLF;
2443      endlinetype = EL_CRLF;
2444      }
2445    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2446      {
2447      pcre_options |= PCRE_NEWLINE_ANY;
2448      endlinetype = EL_ANY;
2449      }
2450    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2451      {
2452      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2453      endlinetype = EL_ANYCRLF;
2454      }
2455  else  else
2456    {    {
2457    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2458    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2459    if (pattern_list[0] == NULL)    }
2460    
2461    /* Interpret the text values for -d and -D */
2462    
2463    if (dee_option != NULL)
2464      {
2465      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2466      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2467      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2468      else
2469        {
2470        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2471        return 2;
2472        }
2473      }
2474    
2475    if (DEE_option != NULL)
2476      {
2477      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2478      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2479      else
2480      {      {
2481      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
       error);  
2482      return 2;      return 2;
2483      }      }
   pattern_count++;  
2484    }    }
2485    
2486  /* Study the regular expressions, as we will be running them may times */  /* Check the values for Jeffrey Friedl's debugging options. */
2487    
2488    #ifdef JFRIEDL_DEBUG
2489    if (S_arg > 9)
2490      {
2491      fprintf(stderr, "pcregrep: bad value for -S option\n");
2492      return 2;
2493      }
2494    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2495      {
2496      if (jfriedl_XT == 0) jfriedl_XT = 1;
2497      if (jfriedl_XR == 0) jfriedl_XR = 1;
2498      }
2499    #endif
2500    
2501    /* Get memory for the main buffer, and to store the pattern and hints lists. */
2502    
2503    bufsize = 3*bufthird;
2504    main_buffer = (char *)malloc(bufsize);
2505    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2506    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2507    
2508    if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2509      {
2510      fprintf(stderr, "pcregrep: malloc failed\n");
2511      goto EXIT2;
2512      }
2513    
2514    /* If no patterns were provided by -e, and there is no file provided by -f,
2515    the first argument is the one and only pattern, and it must exist. */
2516    
2517    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2518      {
2519      if (i >= argc) return usage(2);
2520      patterns[cmd_pattern_count++] = argv[i++];
2521      }
2522    
2523    /* Compile the patterns that were provided on the command line, either by
2524    multiple uses of -e or as a single unkeyed pattern. */
2525    
2526    for (j = 0; j < cmd_pattern_count; j++)
2527      {
2528      if (!compile_pattern(patterns[j], pcre_options, NULL,
2529           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2530        goto EXIT2;
2531      }
2532    
2533    /* Compile the regular expressions that are provided in a file. */
2534    
2535    if (pattern_filename != NULL)
2536      {
2537      int linenumber = 0;
2538      FILE *f;
2539      char *filename;
2540      char buffer[PATBUFSIZE];
2541    
2542      if (strcmp(pattern_filename, "-") == 0)
2543        {
2544        f = stdin;
2545        filename = stdin_name;
2546        }
2547      else
2548        {
2549        f = fopen(pattern_filename, "r");
2550        if (f == NULL)
2551          {
2552          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2553            strerror(errno));
2554          goto EXIT2;
2555          }
2556        filename = pattern_filename;
2557        }
2558    
2559      while (fgets(buffer, PATBUFSIZE, f) != NULL)
2560        {
2561        char *s = buffer + (int)strlen(buffer);
2562        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2563        *s = 0;
2564        linenumber++;
2565        if (buffer[0] == 0) continue;   /* Skip blank lines */
2566        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2567          goto EXIT2;
2568        }
2569    
2570      if (f != stdin) fclose(f);
2571      }
2572    
2573    /* Study the regular expressions, as we will be running them many times */
2574    
2575  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2576    {    {
2577    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2578    if (error != NULL)    if (error != NULL)
2579      {      {
2580      char s[16];      char s[16];
2581      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2582      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2583      return 2;      goto EXIT2;
2584        }
2585      hint_count++;
2586      }
2587    
2588    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2589    pcre_extra block for each pattern. */
2590    
2591    if (match_limit > 0 || match_limit_recursion > 0)
2592      {
2593      for (j = 0; j < pattern_count; j++)
2594        {
2595        if (hints_list[j] == NULL)
2596          {
2597          hints_list[j] = malloc(sizeof(pcre_extra));
2598          if (hints_list[j] == NULL)
2599            {
2600            fprintf(stderr, "pcregrep: malloc failed\n");
2601            pcregrep_exit(2);
2602            }
2603          }
2604        if (match_limit > 0)
2605          {
2606          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2607          hints_list[j]->match_limit = match_limit;
2608          }
2609        if (match_limit_recursion > 0)
2610          {
2611          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2612          hints_list[j]->match_limit_recursion = match_limit_recursion;
2613          }
2614        }
2615      }
2616    
2617    /* If there are include or exclude patterns, compile them. */
2618    
2619    if (exclude_pattern != NULL)
2620      {
2621      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2622        pcretables);
2623      if (exclude_compiled == NULL)
2624        {
2625        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2626          errptr, error);
2627        goto EXIT2;
2628        }
2629      }
2630    
2631    if (include_pattern != NULL)
2632      {
2633      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2634        pcretables);
2635      if (include_compiled == NULL)
2636        {
2637        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2638          errptr, error);
2639        goto EXIT2;
2640        }
2641      }
2642    
2643    if (exclude_dir_pattern != NULL)
2644      {
2645      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2646        pcretables);
2647      if (exclude_dir_compiled == NULL)
2648        {
2649        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2650          errptr, error);
2651        goto EXIT2;
2652        }
2653      }
2654    
2655    if (include_dir_pattern != NULL)
2656      {
2657      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2658        pcretables);
2659      if (include_dir_compiled == NULL)
2660        {
2661        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2662          errptr, error);
2663        goto EXIT2;
2664      }      }
2665    }    }
2666    
2667  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2668    
2669  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2670      {
2671      rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2672        (filenames > FN_DEFAULT)? stdin_name : NULL);
2673      goto EXIT;
2674      }
2675    
2676  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2677  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2678  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2679    otherwise forced. */
2680    
2681  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2682    
2683  for (; i < argc; i++)  for (; i < argc; i++)
2684    {    {
2685    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2686    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2687      if (frc > 1) rc = frc;
2688        else if (frc == 0 && rc == 1) rc = 0;
2689    }    }
2690    
2691  return rc;  EXIT:
2692    if (main_buffer != NULL) free(main_buffer);
2693    if (pattern_list != NULL)
2694      {
2695      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2696      free(pattern_list);
2697      }
2698    if (hints_list != NULL)
2699      {
2700      for (i = 0; i < hint_count; i++)
2701        {
2702        if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2703        }
2704      free(hints_list);
2705      }
2706    pcregrep_exit(rc);
2707    
2708    EXIT2:
2709    rc = 2;
2710    goto EXIT;
2711  }  }
2712    
2713  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.667

  ViewVC Help
Powered by ViewVC 1.1.5