/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 589 by ph10, Sat Jan 15 11:31:39 2011 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2011 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 70  its pattern matching. On a Unix system i
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "2.0 01-Aug-2001"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define MBUFTHIRD BUFSIZ
78    #else
79    #define MBUFTHIRD 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
118  *               Global variables                 *  *               Global variables                 *
119  *************************************************/  *************************************************/
120    
121    /* Jeffrey Friedl has some debugging requirements that are not part of the
122    regular code. */
123    
124    #ifdef JFRIEDL_DEBUG
125    static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130    #endif
131    
132    static int  endlinetype;
133    
134    static char *colour_string = (char *)"1;31";
135    static char *colour_option = NULL;
136    static char *dee_option = NULL;
137    static char *DEE_option = NULL;
138    static char *newline = NULL;
139  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
140    static char *stdin_name = (char *)"(standard input)";
141    static char *locale = NULL;
142    
143    static const unsigned char *pcretables = NULL;
144    
145  static int  pattern_count = 0;  static int  pattern_count = 0;
146  static pcre **pattern_list;  static pcre **pattern_list = NULL;
147  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
148    
149    static char *include_pattern = NULL;
150    static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154    static pcre *include_compiled = NULL;
155    static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159    static int after_context = 0;
160    static int before_context = 0;
161    static int both_context = 0;
162    static int dee_action = dee_READ;
163    static int DEE_action = DEE_READ;
164    static int error_count = 0;
165    static int filenames = FN_DEFAULT;
166    static int only_matching = -1;
167    static int process_options = 0;
168    
169    static unsigned long int match_limit = 0;
170    static unsigned long int match_limit_recursion = 0;
171    
172  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
173  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
174  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
175    static BOOL hyphenpending = FALSE;
176  static BOOL invert = FALSE;  static BOOL invert = FALSE;
177    static BOOL line_buffered = FALSE;
178    static BOOL line_offsets = FALSE;
179    static BOOL multiline = FALSE;
180  static BOOL number = FALSE;  static BOOL number = FALSE;
181  static BOOL recurse = FALSE;  static BOOL omit_zero_count = FALSE;
182    static BOOL resource_error = FALSE;
183    static BOOL quiet = FALSE;
184  static BOOL silent = FALSE;  static BOOL silent = FALSE;
185  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
186    
187  /* Structure for options and list of them */  /* Structure for options and list of them */
188    
189    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190           OP_OP_NUMBER, OP_PATLIST };
191    
192  typedef struct option_item {  typedef struct option_item {
193      int type;
194    int one_char;    int one_char;
195    char *long_name;    void *dataptr;
196    char *help_text;    const char *long_name;
197      const char *help_text;
198  } option_item;  } option_item;
199    
200    /* Options without a single-letter equivalent get a negative value. This can be
201    used to identify them. */
202    
203    #define N_COLOUR       (-1)
204    #define N_EXCLUDE      (-2)
205    #define N_EXCLUDE_DIR  (-3)
206    #define N_HELP         (-4)
207    #define N_INCLUDE      (-5)
208    #define N_INCLUDE_DIR  (-6)
209    #define N_LABEL        (-7)
210    #define N_LOCALE       (-8)
211    #define N_NULL         (-9)
212    #define N_LOFFSETS     (-10)
213    #define N_FOFFSETS     (-11)
214    #define N_LBUFFER      (-12)
215    #define N_M_LIMIT      (-13)
216    #define N_M_LIMIT_REC  (-14)
217    
218  static option_item optionlist[] = {  static option_item optionlist[] = {
219    { -1,  "help",         "display this help and exit" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
220    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
221    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
222    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
223    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
224    { 'n', "line-number",  "print line number with output lines" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
225    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
226    { 's', "no-messages",  "suppress error messages" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
227    { 'V', "version",      "print version information and exit" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
228    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
229    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
230    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
231    { 0,    NULL,           NULL }    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
232      { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
233      { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
234      { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
235      { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
236      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
237      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
238      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
239      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
240      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
241      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
242      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
243      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
245      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
247      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
248      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
249      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
250      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
251      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
252      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254    
255      /* These two were accidentally implemented with underscores instead of
256      hyphens in the option names. As this was not discovered for several releases,
257      the incorrect versions are left in the table for compatibility. However, the
258      --help function misses out any option that has an underscore in its name. */
259    
260      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262    
263    #ifdef JFRIEDL_DEBUG
264      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
265    #endif
266      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
267      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
268      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
269      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
270      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
271      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
272      { OP_NODATA,    0,        NULL,               NULL,            NULL }
273  };  };
274    
275    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277    that the combination of -w and -x has the same effect as -x on its own, so we
278    can treat them as the same. */
279    
280    static const char *prefix[] = {
281      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282    
283    static const char *suffix[] = {
284      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
285    
286    /* UTF-8 tables - used only when the newline setting is "any". */
287    
288    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289    
290    const char utf8_table4[] = {
291      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295    
296    
297    
298  /*************************************************  /*************************************************
299  *       Functions for directory scanning         *  *         Exit from the program                  *
300    *************************************************/
301    
302    /* If there has been a resource error, give a suitable message.
303    
304    Argument:  the return code
305    Returns:   does not return
306    */
307    
308    static void
309    pcregrep_exit(int rc)
310    {
311    if (resource_error)
312      {
313      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316      }
317    
318    exit(rc);
319    }
320    
321    
322    /*************************************************
323    *            OS-specific functions               *
324  *************************************************/  *************************************************/
325    
326  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
327  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
328    
329    
330  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
331    
332  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
333  #include <sys/types.h>  #include <sys/types.h>
334  #include <sys/stat.h>  #include <sys/stat.h>
335  #include <dirent.h>  #include <dirent.h>
336    
337  typedef DIR directory_type;  typedef DIR directory_type;
338    
339  int  static int
340  isdirectory(char *filename)  isdirectory(char *filename)
341  {  {
342  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 345  if (stat(filename, &statbuf) < 0)
345  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
346  }  }
347    
348  directory_type *  static directory_type *
349  opendirectory(char *filename)  opendirectory(char *filename)
350  {  {
351  return opendir(filename);  return opendir(filename);
352  }  }
353    
354  char *  static char *
355  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
356  {  {
357  for (;;)  for (;;)
# Line 108  for (;;) Line 361  for (;;)
361    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
362      return dent->d_name;      return dent->d_name;
363    }    }
364    /* Control never reaches here */
365    }
366    
367    static void
368    closedirectory(directory_type *dir)
369    {
370    closedir(dir);
371    }
372    
373    
374    /************* Test for regular file in Unix **********/
375    
376    static int
377    isregfile(char *filename)
378    {
379    struct stat statbuf;
380    if (stat(filename, &statbuf) < 0)
381      return 1;        /* In the expectation that opening as a file will fail */
382    return (statbuf.st_mode & S_IFMT) == S_IFREG;
383    }
384    
385    
386    /************* Test for a terminal in Unix **********/
387    
388    static BOOL
389    is_stdout_tty(void)
390    {
391    return isatty(fileno(stdout));
392    }
393    
394    static BOOL
395    is_file_tty(FILE *f)
396    {
397    return isatty(fileno(f));
398    }
399    
400    
401    /************* Directory scanning in Win32 ***********/
402    
403    /* I (Philip Hazel) have no means of testing this code. It was contributed by
404    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
405    when it did not exist. David Byron added a patch that moved the #include of
406    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
407    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
408    undefined when it is indeed undefined. */
409    
410    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
411    
412    #ifndef STRICT
413    # define STRICT
414    #endif
415    #ifndef WIN32_LEAN_AND_MEAN
416    # define WIN32_LEAN_AND_MEAN
417    #endif
418    
419    #include <windows.h>
420    
421    #ifndef INVALID_FILE_ATTRIBUTES
422    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
423    #endif
424    
425    typedef struct directory_type
426    {
427    HANDLE handle;
428    BOOL first;
429    WIN32_FIND_DATA data;
430    } directory_type;
431    
432    int
433    isdirectory(char *filename)
434    {
435    DWORD attr = GetFileAttributes(filename);
436    if (attr == INVALID_FILE_ATTRIBUTES)
437      return 0;
438    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
439    }
440    
441    directory_type *
442    opendirectory(char *filename)
443    {
444    size_t len;
445    char *pattern;
446    directory_type *dir;
447    DWORD err;
448    len = strlen(filename);
449    pattern = (char *) malloc(len + 3);
450    dir = (directory_type *) malloc(sizeof(*dir));
451    if ((pattern == NULL) || (dir == NULL))
452      {
453      fprintf(stderr, "pcregrep: malloc failed\n");
454      pcregrep_exit(2);
455      }
456    memcpy(pattern, filename, len);
457    memcpy(&(pattern[len]), "\\*", 3);
458    dir->handle = FindFirstFile(pattern, &(dir->data));
459    if (dir->handle != INVALID_HANDLE_VALUE)
460      {
461      free(pattern);
462      dir->first = TRUE;
463      return dir;
464      }
465    err = GetLastError();
466    free(pattern);
467    free(dir);
468    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
469    return NULL;
470    }
471    
472    char *
473    readdirectory(directory_type *dir)
474    {
475    for (;;)
476      {
477      if (!dir->first)
478        {
479        if (!FindNextFile(dir->handle, &(dir->data)))
480          return NULL;
481        }
482      else
483        {
484        dir->first = FALSE;
485        }
486      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
487        return dir->data.cFileName;
488      }
489    #ifndef _MSC_VER
490  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
491    #endif
492  }  }
493    
494  void  void
495  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
496  {  {
497  closedir(dir);  FindClose(dir->handle);
498    free(dir);
499  }  }
500    
501    
502  #else  /************* Test for regular file in Win32 **********/
503    
504    /* I don't know how to do this, or if it can be done; assume all paths are
505    regular if they are not directories. */
506    
507    int isregfile(char *filename)
508    {
509    return !isdirectory(filename);
510    }
511    
512    
513    /************* Test for a terminal in Win32 **********/
514    
515    /* I don't know how to do this; assume never */
516    
517    static BOOL
518    is_stdout_tty(void)
519    {
520    return FALSE;
521    }
522    
523    static BOOL
524    is_file_tty(FILE *f)
525    {
526    return FALSE;
527    }
528    
529    
530  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
531    
532  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
533    
534    #else
535    
536  typedef void directory_type;  typedef void directory_type;
537    
538  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
539  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
540  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
541  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
542    
543    
544    /************* Test for regular when we can't do it **********/
545    
546    /* Assume all files are regular. */
547    
548    int isregfile(char *filename) { return 1; }
549    
550    
551    /************* Test for a terminal when we can't do it **********/
552    
553    static BOOL
554    is_stdout_tty(void)
555    {
556    return FALSE;
557    }
558    
559    static BOOL
560    is_file_tty(FILE *f)
561    {
562    return FALSE;
563    }
564    
565  #endif  #endif
566    
567    
568    
569  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
570  /*************************************************  /*************************************************
571  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
572  *************************************************/  *************************************************/
# Line 159  return sys_errlist[n]; Line 589  return sys_errlist[n];
589    
590    
591  /*************************************************  /*************************************************
592  *              Grep an individual file           *  *            Read one line of input              *
593  *************************************************/  *************************************************/
594    
595    /* Normally, input is read using fread() into a large buffer, so many lines may
596    be read at once. However, doing this for tty input means that no output appears
597    until a lot of input has been typed. Instead, tty input is handled line by
598    line. We cannot use fgets() for this, because it does not stop at a binary
599    zero, and therefore there is no way of telling how many characters it has read,
600    because there may be binary zeros embedded in the data.
601    
602    Arguments:
603      buffer     the buffer to read into
604      length     the maximum number of characters to read
605      f          the file
606    
607    Returns:     the number of characters read, zero at end of file
608    */
609    
610  static int  static int
611  pcregrep(FILE *in, char *name)  read_one_line(char *buffer, int length, FILE *f)
612  {  {
613  int rc = 1;  int c;
614  int linenumber = 0;  int yield = 0;
615  int count = 0;  while ((c = fgetc(f)) != EOF)
616  int offsets[99];    {
617  char buffer[BUFSIZ];    buffer[yield++] = c;
618      if (c == '\n' || yield >= length) break;
619      }
620    return yield;
621    }
622    
623    
624    
625    /*************************************************
626    *             Find end of line                   *
627    *************************************************/
628    
629    /* The length of the endline sequence that is found is set via lenptr. This may
630    be zero at the very end of the file if there is no line-ending sequence there.
631    
632    Arguments:
633      p         current position in line
634      endptr    end of available data
635      lenptr    where to put the length of the eol sequence
636    
637    Returns:    pointer to the last byte of the line, including the newline byte(s)
638    */
639    
640  while (fgets(buffer, sizeof(buffer), in) != NULL)  static char *
641    end_of_line(char *p, char *endptr, int *lenptr)
642    {
643    switch(endlinetype)
644    {    {
645    BOOL match = FALSE;    default:      /* Just in case */
646    int i;    case EL_LF:
647    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
648    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
649    linenumber++;      {
650        *lenptr = 1;
651        return p + 1;
652        }
653      *lenptr = 0;
654      return endptr;
655    
656    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
657      while (p < endptr && *p != '\r') p++;
658      if (p < endptr)
659      {      {
660      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
661        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
662      }      }
663      *lenptr = 0;
664      return endptr;
665    
666    if (match != invert)    case EL_CRLF:
667      for (;;)
668      {      {
669      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
670        if (++p >= endptr)
671          {
672          *lenptr = 0;
673          return endptr;
674          }
675        if (*p == '\n')
676          {
677          *lenptr = 2;
678          return p + 1;
679          }
680        }
681      break;
682    
683      else if (filenames_only)    case EL_ANYCRLF:
684      while (p < endptr)
685        {
686        int extra = 0;
687        register int c = *((unsigned char *)p);
688    
689        if (utf8 && c >= 0xc0)
690        {        {
691        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
692        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
693          gcss = 6*extra;
694          c = (c & utf8_table3[extra]) << gcss;
695          for (gcii = 1; gcii <= extra; gcii++)
696            {
697            gcss -= 6;
698            c |= (p[gcii] & 0x3f) << gcss;
699            }
700        }        }
701    
702      else if (silent) return 0;      p += 1 + extra;
703    
704      else      switch (c)
705        {        {
706        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
707        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
708        fprintf(stdout, "%s\n", buffer);        return p;
709    
710          case 0x0d:    /* CR */
711          if (p < endptr && *p == 0x0a)
712            {
713            *lenptr = 2;
714            p++;
715            }
716          else *lenptr = 1;
717          return p;
718    
719          default:
720          break;
721        }        }
722        }   /* End of loop for ANYCRLF case */
723    
724      rc = 0;    *lenptr = 0;  /* Must have hit the end */
725      }    return endptr;
   }  
726    
727  if (count_only)    case EL_ANY:
728    {    while (p < endptr)
729    if (name != NULL) fprintf(stdout, "%s:", name);      {
730    fprintf(stdout, "%d\n", count);      int extra = 0;
731    }      register int c = *((unsigned char *)p);
732    
733  return rc;      if (utf8 && c >= 0xc0)
734  }        {
735          int gcii, gcss;
736          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
737          gcss = 6*extra;
738          c = (c & utf8_table3[extra]) << gcss;
739          for (gcii = 1; gcii <= extra; gcii++)
740            {
741            gcss -= 6;
742            c |= (p[gcii] & 0x3f) << gcss;
743            }
744          }
745    
746        p += 1 + extra;
747    
748        switch (c)
749          {
750          case 0x0a:    /* LF */
751          case 0x0b:    /* VT */
752          case 0x0c:    /* FF */
753          *lenptr = 1;
754          return p;
755    
756          case 0x0d:    /* CR */
757          if (p < endptr && *p == 0x0a)
758            {
759            *lenptr = 2;
760            p++;
761            }
762          else *lenptr = 1;
763          return p;
764    
765          case 0x85:    /* NEL */
766          *lenptr = utf8? 2 : 1;
767          return p;
768    
769          case 0x2028:  /* LS */
770          case 0x2029:  /* PS */
771          *lenptr = 3;
772          return p;
773    
774          default:
775          break;
776          }
777        }   /* End of loop for ANY case */
778    
779      *lenptr = 0;  /* Must have hit the end */
780      return endptr;
781      }     /* End of overall switch */
782    }
783    
784    
785    
786  /*************************************************  /*************************************************
787  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
788  *************************************************/  *************************************************/
789    
790  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
791    
792  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
793  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
794      startptr  start of available data
795    
796  if ((sep = isdirectory(filename)) != 0 && recurse)  Returns:    pointer to the start of the previous line
797    {  */
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
798    
799    if (dir == NULL)  static char *
800      {  previous_line(char *p, char *startptr)
801      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  {
802        strerror(errno));  switch(endlinetype)
803      return 2;    {
804      }    default:      /* Just in case */
805      case EL_LF:
806      p--;
807      while (p > startptr && p[-1] != '\n') p--;
808      return p;
809    
810      case EL_CR:
811      p--;
812      while (p > startptr && p[-1] != '\n') p--;
813      return p;
814    
815    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
816      for (;;)
817      {      {
818      int frc;      p -= 2;
819      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      while (p > startptr && p[-1] != '\n') p--;
820      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);      if (p <= startptr + 1 || p[-2] == '\r') return p;
     if (frc == 0 && rc == 1) rc = 0;  
821      }      }
822      return p;   /* But control should never get here */
823    
824    closedirectory(dir);    case EL_ANY:
825    return rc;    case EL_ANYCRLF:
826    }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
827      if (utf8) while ((*p & 0xc0) == 0x80) p--;
828    
829  /* If the file is not a directory, or we are not recursing, scan it. If this is    while (p > startptr)
830  the first and only argument at top level, we don't show the file name.      {
831  Otherwise, control is via the show_filenames variable. */      register int c;
832        char *pp = p - 1;
833    
834  in = fopen(filename, "r");      if (utf8)
835  if (in == NULL)        {
836    {        int extra = 0;
837    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));        while ((*pp & 0xc0) == 0x80) pp--;
838    return 2;        c = *((unsigned char *)pp);
839    }        if (c >= 0xc0)
840            {
841            int gcii, gcss;
842            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
843            gcss = 6*extra;
844            c = (c & utf8_table3[extra]) << gcss;
845            for (gcii = 1; gcii <= extra; gcii++)
846              {
847              gcss -= 6;
848              c |= (pp[gcii] & 0x3f) << gcss;
849              }
850            }
851          }
852        else c = *((unsigned char *)pp);
853    
854  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);      if (endlinetype == EL_ANYCRLF) switch (c)
855  fclose(in);        {
856  return rc;        case 0x0a:    /* LF */
857  }        case 0x0d:    /* CR */
858          return p;
859    
860          default:
861          break;
862          }
863    
864        else switch (c)
865          {
866          case 0x0a:    /* LF */
867          case 0x0b:    /* VT */
868          case 0x0c:    /* FF */
869          case 0x0d:    /* CR */
870          case 0x85:    /* NEL */
871          case 0x2028:  /* LS */
872          case 0x2029:  /* PS */
873          return p;
874    
875          default:
876          break;
877          }
878    
879  /*************************************************      p = pp;  /* Back one character */
880  *                Usage function                  *      }        /* End of loop for ANY case */
 *************************************************/  
881    
882  static int    return startptr;  /* Hit start of data */
883  usage(int rc)    }     /* End of overall switch */
 {  
 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
884  }  }
885    
886    
887    
888    
889    
890  /*************************************************  /*************************************************
891  *                Help function                   *  *       Print the previous "after" lines         *
892    *************************************************/
893    
894    /* This is called if we are about to lose said lines because of buffer filling,
895    and at the end of the file. The data in the line is written using fwrite() so
896    that a binary zero does not terminate it.
897    
898    Arguments:
899      lastmatchnumber   the number of the last matching line, plus one
900      lastmatchrestart  where we restarted after the last match
901      endptr            end of available data
902      printname         filename for printing
903    
904    Returns:            nothing
905    */
906    
907    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
908      char *endptr, char *printname)
909    {
910    if (after_context > 0 && lastmatchnumber > 0)
911      {
912      int count = 0;
913      while (lastmatchrestart < endptr && count++ < after_context)
914        {
915        int ellength;
916        char *pp = lastmatchrestart;
917        if (printname != NULL) fprintf(stdout, "%s-", printname);
918        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
919        pp = end_of_line(pp, endptr, &ellength);
920        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
921        lastmatchrestart = pp;
922        }
923      hyphenpending = TRUE;
924      }
925    }
926    
927    
928    
929    /*************************************************
930    *   Apply patterns to subject till one matches   *
931    *************************************************/
932    
933    /* This function is called to run through all patterns, looking for a match. It
934    is used multiple times for the same subject when colouring is enabled, in order
935    to find all possible matches.
936    
937    Arguments:
938      matchptr    the start of the subject
939      length      the length of the subject to match
940      offsets     the offets vector to fill in
941      mrc         address of where to put the result of pcre_exec()
942    
943    Returns:      TRUE if there was a match
944                  FALSE if there was no match
945                  invert if there was a non-fatal error
946    */
947    
948    static BOOL
949    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
950    {
951    int i;
952    size_t slen = length;
953    const char *msg = "this text:\n\n";
954    if (slen > 200)
955      {
956      slen = 200;
957      msg = "text that starts:\n\n";
958      }
959    for (i = 0; i < pattern_count; i++)
960      {
961      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
962        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
963      if (*mrc >= 0) return TRUE;
964      if (*mrc == PCRE_ERROR_NOMATCH) continue;
965      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
966      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
967      fprintf(stderr, "%s", msg);
968      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
969      fprintf(stderr, "\n\n");
970      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
971        resource_error = TRUE;
972      if (error_count++ > 20)
973        {
974        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
975        pcregrep_exit(2);
976        }
977      return invert;    /* No more matching; don't show the line again */
978      }
979    
980    return FALSE;  /* No match, no errors */
981    }
982    
983    
984    
985    /*************************************************
986    *            Grep an individual file             *
987    *************************************************/
988    
989    /* This is called from grep_or_recurse() below. It uses a buffer that is three
990    times the value of MBUFTHIRD. The matching point is never allowed to stray into
991    the top third of the buffer, thus keeping more of the file available for
992    context printing or for multiline scanning. For large files, the pointer will
993    be in the middle third most of the time, so the bottom third is available for
994    "before" context printing.
995    
996    Arguments:
997      handle       the fopened FILE stream for a normal file
998                   the gzFile pointer when reading is via libz
999                   the BZFILE pointer when reading is via libbz2
1000      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1001      printname    the file name if it is to be printed for each match
1002                   or NULL if the file name is not to be printed
1003                   it cannot be NULL if filenames[_nomatch]_only is set
1004    
1005    Returns:       0 if there was at least one match
1006                   1 otherwise (no matches)
1007                   2 if there is a read error on a .bz2 file
1008    */
1009    
1010    static int
1011    pcregrep(void *handle, int frtype, char *printname)
1012    {
1013    int rc = 1;
1014    int linenumber = 1;
1015    int lastmatchnumber = 0;
1016    int count = 0;
1017    int filepos = 0;
1018    int offsets[OFFSET_SIZE];
1019    char *lastmatchrestart = NULL;
1020    char buffer[3*MBUFTHIRD];
1021    char *ptr = buffer;
1022    char *endptr;
1023    size_t bufflength;
1024    BOOL endhyphenpending = FALSE;
1025    BOOL input_line_buffered = line_buffered;
1026    FILE *in = NULL;                    /* Ensure initialized */
1027    
1028    #ifdef SUPPORT_LIBZ
1029    gzFile ingz = NULL;
1030    #endif
1031    
1032    #ifdef SUPPORT_LIBBZ2
1033    BZFILE *inbz2 = NULL;
1034    #endif
1035    
1036    
1037    /* Do the first read into the start of the buffer and set up the pointer to end
1038    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1039    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1040    fail. */
1041    
1042    #ifdef SUPPORT_LIBZ
1043    if (frtype == FR_LIBZ)
1044      {
1045      ingz = (gzFile)handle;
1046      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1047      }
1048    else
1049    #endif
1050    
1051    #ifdef SUPPORT_LIBBZ2
1052    if (frtype == FR_LIBBZ2)
1053      {
1054      inbz2 = (BZFILE *)handle;
1055      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1056      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1057      }                                    /* without the cast it is unsigned. */
1058    else
1059    #endif
1060    
1061      {
1062      in = (FILE *)handle;
1063      if (is_file_tty(in)) input_line_buffered = TRUE;
1064      bufflength = input_line_buffered?
1065        read_one_line(buffer, 3*MBUFTHIRD, in) :
1066        fread(buffer, 1, 3*MBUFTHIRD, in);
1067      }
1068    
1069    endptr = buffer + bufflength;
1070    
1071    /* Loop while the current pointer is not at the end of the file. For large
1072    files, endptr will be at the end of the buffer when we are in the middle of the
1073    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1074    way, the buffer is shifted left and re-filled. */
1075    
1076    while (ptr < endptr)
1077      {
1078      int endlinelength;
1079      int mrc = 0;
1080      BOOL match;
1081      char *matchptr = ptr;
1082      char *t = ptr;
1083      size_t length, linelength;
1084    
1085      /* At this point, ptr is at the start of a line. We need to find the length
1086      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1087      length remainder of the data in the buffer. Otherwise, it is the length of
1088      the next line, excluding the terminating newline. After matching, we always
1089      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1090      option is used for compiling, so that any match is constrained to be in the
1091      first line. */
1092    
1093      t = end_of_line(t, endptr, &endlinelength);
1094      linelength = t - ptr - endlinelength;
1095      length = multiline? (size_t)(endptr - ptr) : linelength;
1096    
1097      /* Extra processing for Jeffrey Friedl's debugging. */
1098    
1099    #ifdef JFRIEDL_DEBUG
1100      if (jfriedl_XT || jfriedl_XR)
1101      {
1102          #include <sys/time.h>
1103          #include <time.h>
1104          struct timeval start_time, end_time;
1105          struct timezone dummy;
1106          int i;
1107    
1108          if (jfriedl_XT)
1109          {
1110              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1111              const char *orig = ptr;
1112              ptr = malloc(newlen + 1);
1113              if (!ptr) {
1114                      printf("out of memory");
1115                      pcregrep_exit(2);
1116              }
1117              endptr = ptr;
1118              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1119              for (i = 0; i < jfriedl_XT; i++) {
1120                      strncpy(endptr, orig,  length);
1121                      endptr += length;
1122              }
1123              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1124              length = newlen;
1125          }
1126    
1127          if (gettimeofday(&start_time, &dummy) != 0)
1128                  perror("bad gettimeofday");
1129    
1130    
1131          for (i = 0; i < jfriedl_XR; i++)
1132              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1133                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1134    
1135          if (gettimeofday(&end_time, &dummy) != 0)
1136                  perror("bad gettimeofday");
1137    
1138          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1139                          -
1140                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1141    
1142          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1143          return 0;
1144      }
1145    #endif
1146    
1147      /* We come back here after a match when the -o option (only_matching) is set,
1148      in order to find any further matches in the same line. */
1149    
1150      ONLY_MATCHING_RESTART:
1151    
1152      /* Run through all the patterns until one matches or there is an error other
1153      than NOMATCH. This code is in a subroutine so that it can be re-used for
1154      finding subsequent matches when colouring matched lines. */
1155    
1156      match = match_patterns(matchptr, length, offsets, &mrc);
1157    
1158      /* If it's a match or a not-match (as required), do what's wanted. */
1159    
1160      if (match != invert)
1161        {
1162        BOOL hyphenprinted = FALSE;
1163    
1164        /* We've failed if we want a file that doesn't have any matches. */
1165    
1166        if (filenames == FN_NOMATCH_ONLY) return 1;
1167    
1168        /* Just count if just counting is wanted. */
1169    
1170        if (count_only) count++;
1171    
1172        /* If all we want is a file name, there is no need to scan any more lines
1173        in the file. */
1174    
1175        else if (filenames == FN_MATCH_ONLY)
1176          {
1177          fprintf(stdout, "%s\n", printname);
1178          return 0;
1179          }
1180    
1181        /* Likewise, if all we want is a yes/no answer. */
1182    
1183        else if (quiet) return 0;
1184    
1185        /* The --only-matching option prints just the substring that matched, or a
1186        captured portion of it, as long as this string is not empty, and the
1187        --file-offsets and --line-offsets options output offsets for the matching
1188        substring (they both force --only-matching = 0). None of these options
1189        prints any context. Afterwards, adjust the start and length, and then jump
1190        back to look for further matches in the same line. If we are in invert
1191        mode, however, nothing is printed and we do not restart - this could still
1192        be useful because the return code is set. */
1193    
1194        else if (only_matching >= 0)
1195          {
1196          if (!invert)
1197            {
1198            if (printname != NULL) fprintf(stdout, "%s:", printname);
1199            if (number) fprintf(stdout, "%d:", linenumber);
1200            if (line_offsets)
1201              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1202                offsets[1] - offsets[0]);
1203            else if (file_offsets)
1204              fprintf(stdout, "%d,%d\n",
1205                (int)(filepos + matchptr + offsets[0] - ptr),
1206                offsets[1] - offsets[0]);
1207            else if (only_matching < mrc)
1208              {
1209              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1210              if (plen > 0)
1211                {
1212                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1213                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1214                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1215                fprintf(stdout, "\n");
1216                }
1217              }
1218            else if (printname != NULL || number) fprintf(stdout, "\n");
1219            matchptr += offsets[1];
1220            length -= offsets[1];
1221            match = FALSE;
1222            if (line_buffered) fflush(stdout);
1223            rc = 0;    /* Had some success */
1224            goto ONLY_MATCHING_RESTART;
1225            }
1226          }
1227    
1228        /* This is the default case when none of the above options is set. We print
1229        the matching lines(s), possibly preceded and/or followed by other lines of
1230        context. */
1231    
1232        else
1233          {
1234          /* See if there is a requirement to print some "after" lines from a
1235          previous match. We never print any overlaps. */
1236    
1237          if (after_context > 0 && lastmatchnumber > 0)
1238            {
1239            int ellength;
1240            int linecount = 0;
1241            char *p = lastmatchrestart;
1242    
1243            while (p < ptr && linecount < after_context)
1244              {
1245              p = end_of_line(p, ptr, &ellength);
1246              linecount++;
1247              }
1248    
1249            /* It is important to advance lastmatchrestart during this printing so
1250            that it interacts correctly with any "before" printing below. Print
1251            each line's data using fwrite() in case there are binary zeroes. */
1252    
1253            while (lastmatchrestart < p)
1254              {
1255              char *pp = lastmatchrestart;
1256              if (printname != NULL) fprintf(stdout, "%s-", printname);
1257              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1258              pp = end_of_line(pp, endptr, &ellength);
1259              FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1260              lastmatchrestart = pp;
1261              }
1262            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1263            }
1264    
1265          /* If there were non-contiguous lines printed above, insert hyphens. */
1266    
1267          if (hyphenpending)
1268            {
1269            fprintf(stdout, "--\n");
1270            hyphenpending = FALSE;
1271            hyphenprinted = TRUE;
1272            }
1273    
1274          /* See if there is a requirement to print some "before" lines for this
1275          match. Again, don't print overlaps. */
1276    
1277          if (before_context > 0)
1278            {
1279            int linecount = 0;
1280            char *p = ptr;
1281    
1282            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1283                   linecount < before_context)
1284              {
1285              linecount++;
1286              p = previous_line(p, buffer);
1287              }
1288    
1289            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1290              fprintf(stdout, "--\n");
1291    
1292            while (p < ptr)
1293              {
1294              int ellength;
1295              char *pp = p;
1296              if (printname != NULL) fprintf(stdout, "%s-", printname);
1297              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1298              pp = end_of_line(pp, endptr, &ellength);
1299              FWRITE(p, 1, pp - p, stdout);
1300              p = pp;
1301              }
1302            }
1303    
1304          /* Now print the matching line(s); ensure we set hyphenpending at the end
1305          of the file if any context lines are being output. */
1306    
1307          if (after_context > 0 || before_context > 0)
1308            endhyphenpending = TRUE;
1309    
1310          if (printname != NULL) fprintf(stdout, "%s:", printname);
1311          if (number) fprintf(stdout, "%d:", linenumber);
1312    
1313          /* In multiline mode, we want to print to the end of the line in which
1314          the end of the matched string is found, so we adjust linelength and the
1315          line number appropriately, but only when there actually was a match
1316          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1317          the match will always be before the first newline sequence. */
1318    
1319          if (multiline & !invert)
1320            {
1321            char *endmatch = ptr + offsets[1];
1322            t = ptr;
1323            while (t < endmatch)
1324              {
1325              t = end_of_line(t, endptr, &endlinelength);
1326              if (t < endmatch) linenumber++; else break;
1327              }
1328            linelength = t - ptr - endlinelength;
1329            }
1330    
1331          /*** NOTE: Use only fwrite() to output the data line, so that binary
1332          zeroes are treated as just another data character. */
1333    
1334          /* This extra option, for Jeffrey Friedl's debugging requirements,
1335          replaces the matched string, or a specific captured string if it exists,
1336          with X. When this happens, colouring is ignored. */
1337    
1338    #ifdef JFRIEDL_DEBUG
1339          if (S_arg >= 0 && S_arg < mrc)
1340            {
1341            int first = S_arg * 2;
1342            int last  = first + 1;
1343            FWRITE(ptr, 1, offsets[first], stdout);
1344            fprintf(stdout, "X");
1345            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1346            }
1347          else
1348    #endif
1349    
1350          /* We have to split the line(s) up if colouring, and search for further
1351          matches, but not of course if the line is a non-match. */
1352    
1353          if (do_colour && !invert)
1354            {
1355            int plength;
1356            int last_offset = 0;
1357            FWRITE(ptr, 1, offsets[0], stdout);
1358            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1359            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1360            fprintf(stdout, "%c[00m", 0x1b);
1361            for (;;)
1362              {
1363              last_offset += offsets[1];
1364              matchptr += offsets[1];
1365              length -= offsets[1];
1366              if (last_offset >= linelength + endlinelength ||
1367                  !match_patterns(matchptr, length, offsets, &mrc)) break;
1368              FWRITE(matchptr, 1, offsets[0], stdout);
1369              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1370              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1371              fprintf(stdout, "%c[00m", 0x1b);
1372              }
1373    
1374            /* In multiline mode, we may have already printed the complete line
1375            and its line-ending characters (if they matched the pattern), so there
1376            may be no more to print. */
1377    
1378            plength = (linelength + endlinelength) - last_offset;
1379            if (plength > 0)
1380              FWRITE(ptr + last_offset, 1, plength, stdout);
1381            }
1382    
1383          /* Not colouring; no need to search for further matches */
1384    
1385          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1386          }
1387    
1388        /* End of doing what has to be done for a match. If --line-buffered was
1389        given, flush the output. */
1390    
1391        if (line_buffered) fflush(stdout);
1392        rc = 0;    /* Had some success */
1393    
1394        /* Remember where the last match happened for after_context. We remember
1395        where we are about to restart, and that line's number. */
1396    
1397        lastmatchrestart = ptr + linelength + endlinelength;
1398        lastmatchnumber = linenumber + 1;
1399        }
1400    
1401      /* For a match in multiline inverted mode (which of course did not cause
1402      anything to be printed), we have to move on to the end of the match before
1403      proceeding. */
1404    
1405      if (multiline && invert && match)
1406        {
1407        int ellength;
1408        char *endmatch = ptr + offsets[1];
1409        t = ptr;
1410        while (t < endmatch)
1411          {
1412          t = end_of_line(t, endptr, &ellength);
1413          if (t <= endmatch) linenumber++; else break;
1414          }
1415        endmatch = end_of_line(endmatch, endptr, &ellength);
1416        linelength = endmatch - ptr - ellength;
1417        }
1418    
1419      /* Advance to after the newline and increment the line number. The file
1420      offset to the current line is maintained in filepos. */
1421    
1422      ptr += linelength + endlinelength;
1423      filepos += (int)(linelength + endlinelength);
1424      linenumber++;
1425    
1426      /* If input is line buffered, and the buffer is not yet full, read another
1427      line and add it into the buffer. */
1428    
1429      if (input_line_buffered && bufflength < sizeof(buffer))
1430        {
1431        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1432        bufflength += add;
1433        endptr += add;
1434        }
1435    
1436      /* If we haven't yet reached the end of the file (the buffer is full), and
1437      the current point is in the top 1/3 of the buffer, slide the buffer down by
1438      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1439      about to be lost, print them. */
1440    
1441      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1442        {
1443        if (after_context > 0 &&
1444            lastmatchnumber > 0 &&
1445            lastmatchrestart < buffer + MBUFTHIRD)
1446          {
1447          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1448          lastmatchnumber = 0;
1449          }
1450    
1451        /* Now do the shuffle */
1452    
1453        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1454        ptr -= MBUFTHIRD;
1455    
1456    #ifdef SUPPORT_LIBZ
1457        if (frtype == FR_LIBZ)
1458          bufflength = 2*MBUFTHIRD +
1459            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1460        else
1461    #endif
1462    
1463    #ifdef SUPPORT_LIBBZ2
1464        if (frtype == FR_LIBBZ2)
1465          bufflength = 2*MBUFTHIRD +
1466            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1467        else
1468    #endif
1469    
1470        bufflength = 2*MBUFTHIRD +
1471          (input_line_buffered?
1472           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1473           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1474        endptr = buffer + bufflength;
1475    
1476        /* Adjust any last match point */
1477    
1478        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1479        }
1480      }     /* Loop through the whole file */
1481    
1482    /* End of file; print final "after" lines if wanted; do_after_lines sets
1483    hyphenpending if it prints something. */
1484    
1485    if (only_matching < 0 && !count_only)
1486      {
1487      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1488      hyphenpending |= endhyphenpending;
1489      }
1490    
1491    /* Print the file name if we are looking for those without matches and there
1492    were none. If we found a match, we won't have got this far. */
1493    
1494    if (filenames == FN_NOMATCH_ONLY)
1495      {
1496      fprintf(stdout, "%s\n", printname);
1497      return 0;
1498      }
1499    
1500    /* Print the match count if wanted */
1501    
1502    if (count_only)
1503      {
1504      if (count > 0 || !omit_zero_count)
1505        {
1506        if (printname != NULL && filenames != FN_NONE)
1507          fprintf(stdout, "%s:", printname);
1508        fprintf(stdout, "%d\n", count);
1509        }
1510      }
1511    
1512    return rc;
1513    }
1514    
1515    
1516    
1517    /*************************************************
1518    *     Grep a file or recurse into a directory    *
1519    *************************************************/
1520    
1521    /* Given a path name, if it's a directory, scan all the files if we are
1522    recursing; if it's a file, grep it.
1523    
1524    Arguments:
1525      pathname          the path to investigate
1526      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1527      only_one_at_top   TRUE if the path is the only one at toplevel
1528    
1529    Returns:   0 if there was at least one match
1530               1 if there were no matches
1531               2 there was some kind of error
1532    
1533    However, file opening failures are suppressed if "silent" is set.
1534    */
1535    
1536    static int
1537    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1538    {
1539    int rc = 1;
1540    int sep;
1541    int frtype;
1542    int pathlen;
1543    void *handle;
1544    FILE *in = NULL;           /* Ensure initialized */
1545    
1546    #ifdef SUPPORT_LIBZ
1547    gzFile ingz = NULL;
1548    #endif
1549    
1550    #ifdef SUPPORT_LIBBZ2
1551    BZFILE *inbz2 = NULL;
1552    #endif
1553    
1554    /* If the file name is "-" we scan stdin */
1555    
1556    if (strcmp(pathname, "-") == 0)
1557      {
1558      return pcregrep(stdin, FR_PLAIN,
1559        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1560          stdin_name : NULL);
1561      }
1562    
1563    /* If the file is a directory, skip if skipping or if we are recursing, scan
1564    each file and directory within it, subject to any include or exclude patterns
1565    that were set. The scanning code is localized so it can be made
1566    system-specific. */
1567    
1568    if ((sep = isdirectory(pathname)) != 0)
1569      {
1570      if (dee_action == dee_SKIP) return 1;
1571      if (dee_action == dee_RECURSE)
1572        {
1573        char buffer[1024];
1574        char *nextfile;
1575        directory_type *dir = opendirectory(pathname);
1576    
1577        if (dir == NULL)
1578          {
1579          if (!silent)
1580            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1581              strerror(errno));
1582          return 2;
1583          }
1584    
1585        while ((nextfile = readdirectory(dir)) != NULL)
1586          {
1587          int frc, nflen;
1588          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1589          nflen = (int)(strlen(nextfile));
1590    
1591          if (isdirectory(buffer))
1592            {
1593            if (exclude_dir_compiled != NULL &&
1594                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1595              continue;
1596    
1597            if (include_dir_compiled != NULL &&
1598                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1599              continue;
1600            }
1601          else
1602            {
1603            if (exclude_compiled != NULL &&
1604                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1605              continue;
1606    
1607            if (include_compiled != NULL &&
1608                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1609              continue;
1610            }
1611    
1612          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1613          if (frc > 1) rc = frc;
1614           else if (frc == 0 && rc == 1) rc = 0;
1615          }
1616    
1617        closedirectory(dir);
1618        return rc;
1619        }
1620      }
1621    
1622    /* If the file is not a directory and not a regular file, skip it if that's
1623    been requested. */
1624    
1625    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1626    
1627    /* Control reaches here if we have a regular file, or if we have a directory
1628    and recursion or skipping was not requested, or if we have anything else and
1629    skipping was not requested. The scan proceeds. If this is the first and only
1630    argument at top level, we don't show the file name, unless we are only showing
1631    the file name, or the filename was forced (-H). */
1632    
1633    pathlen = (int)(strlen(pathname));
1634    
1635    /* Open using zlib if it is supported and the file name ends with .gz. */
1636    
1637    #ifdef SUPPORT_LIBZ
1638    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1639      {
1640      ingz = gzopen(pathname, "rb");
1641      if (ingz == NULL)
1642        {
1643        if (!silent)
1644          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1645            strerror(errno));
1646        return 2;
1647        }
1648      handle = (void *)ingz;
1649      frtype = FR_LIBZ;
1650      }
1651    else
1652    #endif
1653    
1654    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1655    
1656    #ifdef SUPPORT_LIBBZ2
1657    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1658      {
1659      inbz2 = BZ2_bzopen(pathname, "rb");
1660      handle = (void *)inbz2;
1661      frtype = FR_LIBBZ2;
1662      }
1663    else
1664    #endif
1665    
1666    /* Otherwise use plain fopen(). The label is so that we can come back here if
1667    an attempt to read a .bz2 file indicates that it really is a plain file. */
1668    
1669    #ifdef SUPPORT_LIBBZ2
1670    PLAIN_FILE:
1671    #endif
1672      {
1673      in = fopen(pathname, "rb");
1674      handle = (void *)in;
1675      frtype = FR_PLAIN;
1676      }
1677    
1678    /* All the opening methods return errno when they fail. */
1679    
1680    if (handle == NULL)
1681      {
1682      if (!silent)
1683        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1684          strerror(errno));
1685      return 2;
1686      }
1687    
1688    /* Now grep the file */
1689    
1690    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1691      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1692    
1693    /* Close in an appropriate manner. */
1694    
1695    #ifdef SUPPORT_LIBZ
1696    if (frtype == FR_LIBZ)
1697      gzclose(ingz);
1698    else
1699    #endif
1700    
1701    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1702    read failed. If the error indicates that the file isn't in fact bzipped, try
1703    again as a normal file. */
1704    
1705    #ifdef SUPPORT_LIBBZ2
1706    if (frtype == FR_LIBBZ2)
1707      {
1708      if (rc == 2)
1709        {
1710        int errnum;
1711        const char *err = BZ2_bzerror(inbz2, &errnum);
1712        if (errnum == BZ_DATA_ERROR_MAGIC)
1713          {
1714          BZ2_bzclose(inbz2);
1715          goto PLAIN_FILE;
1716          }
1717        else if (!silent)
1718          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1719            pathname, err);
1720        }
1721      BZ2_bzclose(inbz2);
1722      }
1723    else
1724    #endif
1725    
1726    /* Normal file close */
1727    
1728    fclose(in);
1729    
1730    /* Pass back the yield from pcregrep(). */
1731    
1732    return rc;
1733    }
1734    
1735    
1736    
1737    
1738    /*************************************************
1739    *                Usage function                  *
1740    *************************************************/
1741    
1742    static int
1743    usage(int rc)
1744    {
1745    option_item *op;
1746    fprintf(stderr, "Usage: pcregrep [-");
1747    for (op = optionlist; op->one_char != 0; op++)
1748      {
1749      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1750      }
1751    fprintf(stderr, "] [long options] [pattern] [files]\n");
1752    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1753      "options.\n");
1754    return rc;
1755    }
1756    
1757    
1758    
1759    
1760    /*************************************************
1761    *                Help function                   *
1762  *************************************************/  *************************************************/
1763    
1764  static void  static void
# Line 304  help(void) Line 1766  help(void)
1766  {  {
1767  option_item *op;  option_item *op;
1768    
1769  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1770  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1771  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1772    printf("\"-\" can be used as a file name to mean STDIN.\n");
1773    
1774    #ifdef SUPPORT_LIBZ
1775    printf("Files whose names end in .gz are read using zlib.\n");
1776    #endif
1777    
1778    #ifdef SUPPORT_LIBBZ2
1779    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1780    #endif
1781    
1782    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1783    printf("Other files and the standard input are read as plain files.\n\n");
1784    #else
1785    printf("All files are read as plain files, without any interpretation.\n\n");
1786    #endif
1787    
1788    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1789  printf("Options:\n");  printf("Options:\n");
1790    
1791  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
1792    {    {
1793    int n;    int n;
1794    char s[4];    char s[4];
1795    
1796      /* Two options were accidentally implemented and documented with underscores
1797      instead of hyphens in their names, something that was not noticed for quite a
1798      few releases. When fixing this, I left the underscored versions in the list
1799      in case people were using them. However, we don't want to display them in the
1800      help data. There are no other options that contain underscores, and we do not
1801      expect ever to implement such options. Therefore, just omit any option that
1802      contains an underscore. */
1803    
1804      if (strchr(op->long_name, '_') != NULL) continue;
1805    
1806    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1807    printf("  %s --%s%n", s, op->long_name, &n);    n = 31 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1808    if (n < 1) n = 1;    if (n < 1) n = 1;
1809    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1810    }    }
1811    
1812  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1813  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1814  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
1815    
1816  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1817  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1818  }  }
1819    
# Line 334  printf("Exit status is 0 if any matches, Line 1821  printf("Exit status is 0 if any matches,
1821    
1822    
1823  /*************************************************  /*************************************************
1824  *                Handle an option                *  *    Handle a single-letter, no data option      *
1825  *************************************************/  *************************************************/
1826    
1827  static int  static int
1828  handle_option(int letter, int options)  handle_option(int letter, int options)
1829    {
1830    switch(letter)
1831      {
1832      case N_FOFFSETS: file_offsets = TRUE; break;
1833      case N_HELP: help(); pcregrep_exit(0);
1834      case N_LOFFSETS: line_offsets = number = TRUE; break;
1835      case N_LBUFFER: line_buffered = TRUE; break;
1836      case 'c': count_only = TRUE; break;
1837      case 'F': process_options |= PO_FIXED_STRINGS; break;
1838      case 'H': filenames = FN_FORCE; break;
1839      case 'h': filenames = FN_NONE; break;
1840      case 'i': options |= PCRE_CASELESS; break;
1841      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1842      case 'L': filenames = FN_NOMATCH_ONLY; break;
1843      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1844      case 'n': number = TRUE; break;
1845      case 'o': only_matching = 0; break;
1846      case 'q': quiet = TRUE; break;
1847      case 'r': dee_action = dee_RECURSE; break;
1848      case 's': silent = TRUE; break;
1849      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1850      case 'v': invert = TRUE; break;
1851      case 'w': process_options |= PO_WORD_MATCH; break;
1852      case 'x': process_options |= PO_LINE_MATCH; break;
1853    
1854      case 'V':
1855      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1856      pcregrep_exit(0);
1857      break;
1858    
1859      default:
1860      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1861      pcregrep_exit(usage(2));
1862      }
1863    
1864    return options;
1865    }
1866    
1867    
1868    
1869    
1870    /*************************************************
1871    *          Construct printed ordinal             *
1872    *************************************************/
1873    
1874    /* This turns a number into "1st", "3rd", etc. */
1875    
1876    static char *
1877    ordin(int n)
1878    {
1879    static char buffer[8];
1880    char *p = buffer;
1881    sprintf(p, "%d", n);
1882    while (*p != 0) p++;
1883    switch (n%10)
1884      {
1885      case 1: strcpy(p, "st"); break;
1886      case 2: strcpy(p, "nd"); break;
1887      case 3: strcpy(p, "rd"); break;
1888      default: strcpy(p, "th"); break;
1889      }
1890    return buffer;
1891    }
1892    
1893    
1894    
1895    /*************************************************
1896    *          Compile a single pattern              *
1897    *************************************************/
1898    
1899    /* When the -F option has been used, this is called for each substring.
1900    Otherwise it's called for each supplied pattern.
1901    
1902    Arguments:
1903      pattern        the pattern string
1904      options        the PCRE options
1905      filename       the file name, or NULL for a command-line pattern
1906      count          0 if this is the only command line pattern, or
1907                     number of the command line pattern, or
1908                     linenumber for a pattern from a file
1909    
1910    Returns:         TRUE on success, FALSE after an error
1911    */
1912    
1913    static BOOL
1914    compile_single_pattern(char *pattern, int options, char *filename, int count)
1915    {
1916    char buffer[MBUFTHIRD + 16];
1917    const char *error;
1918    int errptr;
1919    
1920    if (pattern_count >= MAX_PATTERN_COUNT)
1921      {
1922      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1923        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1924      return FALSE;
1925      }
1926    
1927    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1928      suffix[process_options]);
1929    pattern_list[pattern_count] =
1930      pcre_compile(buffer, options, &error, &errptr, pcretables);
1931    if (pattern_list[pattern_count] != NULL)
1932      {
1933      pattern_count++;
1934      return TRUE;
1935      }
1936    
1937    /* Handle compile errors */
1938    
1939    errptr -= (int)strlen(prefix[process_options]);
1940    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1941    
1942    if (filename == NULL)
1943      {
1944      if (count == 0)
1945        fprintf(stderr, "pcregrep: Error in command-line regex "
1946          "at offset %d: %s\n", errptr, error);
1947      else
1948        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1949          "at offset %d: %s\n", ordin(count), errptr, error);
1950      }
1951    else
1952      {
1953      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1954        "at offset %d: %s\n", count, filename, errptr, error);
1955      }
1956    
1957    return FALSE;
1958    }
1959    
1960    
1961    
1962    /*************************************************
1963    *           Compile one supplied pattern         *
1964    *************************************************/
1965    
1966    /* When the -F option has been used, each string may be a list of strings,
1967    separated by line breaks. They will be matched literally.
1968    
1969    Arguments:
1970      pattern        the pattern string
1971      options        the PCRE options
1972      filename       the file name, or NULL for a command-line pattern
1973      count          0 if this is the only command line pattern, or
1974                     number of the command line pattern, or
1975                     linenumber for a pattern from a file
1976    
1977    Returns:         TRUE on success, FALSE after an error
1978    */
1979    
1980    static BOOL
1981    compile_pattern(char *pattern, int options, char *filename, int count)
1982  {  {
1983  switch(letter)  if ((process_options & PO_FIXED_STRINGS) != 0)
1984    {    {
1985    case -1:  help(); exit(0);    char *eop = pattern + strlen(pattern);
1986    case 'c': count_only = TRUE; break;    char buffer[MBUFTHIRD];
1987    case 'h': filenames = FALSE; break;    for(;;)
1988    case 'i': options |= PCRE_CASELESS; break;      {
1989    case 'l': filenames_only = TRUE;      int ellength;
1990    case 'n': number = TRUE; break;      char *p = end_of_line(pattern, eop, &ellength);
1991    case 'r': recurse = TRUE; break;      if (ellength == 0)
1992    case 's': silent = TRUE; break;        return compile_single_pattern(pattern, options, filename, count);
1993    case 'v': invert = TRUE; break;      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1994    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;      pattern = p;
1995        if (!compile_single_pattern(buffer, options, filename, count))
1996    case 'V':        return FALSE;
1997    fprintf(stderr, "pcregrep version %s using ", VERSION);      }
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
   exit(0);  
   break;  
   
   default:  
   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);  
   exit(usage(2));  
1998    }    }
1999    else return compile_single_pattern(pattern, options, filename, count);
 return options;  
2000  }  }
2001    
2002    
2003    
   
2004  /*************************************************  /*************************************************
2005  *                Main program                    *  *                Main program                    *
2006  *************************************************/  *************************************************/
2007    
2008    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2009    
2010  int  int
2011  main(int argc, char **argv)  main(int argc, char **argv)
2012  {  {
2013  int i, j;  int i, j;
2014  int rc = 1;  int rc = 1;
2015  int options = 0;  int pcre_options = 0;
2016    int cmd_pattern_count = 0;
2017    int hint_count = 0;
2018  int errptr;  int errptr;
 const char *error;  
2019  BOOL only_one_at_top;  BOOL only_one_at_top;
2020    char *patterns[MAX_PATTERN_COUNT];
2021    const char *locale_from = "--locale";
2022    const char *error;
2023    
2024    /* Set the default line ending value from the default in the PCRE library;
2025    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2026    Note that the return values from pcre_config(), though derived from the ASCII
2027    codes, are the same in EBCDIC environments, so we must use the actual values
2028    rather than escapes such as as '\r'. */
2029    
2030    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2031    switch(i)
2032      {
2033      default:               newline = (char *)"lf"; break;
2034      case 13:               newline = (char *)"cr"; break;
2035      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2036      case -1:               newline = (char *)"any"; break;
2037      case -2:               newline = (char *)"anycrlf"; break;
2038      }
2039    
2040  /* Process the options */  /* Process the options */
2041    
2042  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
2043    {    {
2044      option_item *op = NULL;
2045      char *option_data = (char *)"";    /* default to keep compiler happy */
2046      BOOL longop;
2047      BOOL longopwasequals = FALSE;
2048    
2049    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2050    
2051    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2052      but only if we have previously had -e or -f to define the patterns. */
2053    
2054      if (argv[i][1] == 0)
2055        {
2056        if (pattern_filename != NULL || pattern_count > 0) break;
2057          else pcregrep_exit(usage(2));
2058        }
2059    
2060      /* Handle a long name option, or -- to terminate the options */
2061    
2062    if (argv[i][1] == '-')    if (argv[i][1] == '-')
2063      {      {
2064      option_item *op;      char *arg = argv[i] + 2;
2065        char *argequals = strchr(arg, '=');
2066    
2067      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
2068        {        {
2069        pattern_filename = argv[i] + 7;        i++;
2070        continue;        break;                /* out of the options-handling loop */
2071        }        }
2072    
2073        longop = TRUE;
2074    
2075        /* Some long options have data that follows after =, for example file=name.
2076        Some options have variations in the long name spelling: specifically, we
2077        allow "regexp" because GNU grep allows it, though I personally go along
2078        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2079        These options are entered in the table as "regex(p)". Options can be in
2080        both these categories. */
2081    
2082      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2083        {        {
2084        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
2085          char *equals = strchr(op->long_name, '=');
2086    
2087          /* Handle options with only one spelling of the name */
2088    
2089          if (opbra == NULL)     /* Does not contain '(' */
2090            {
2091            if (equals == NULL)  /* Not thing=data case */
2092              {
2093              if (strcmp(arg, op->long_name) == 0) break;
2094              }
2095            else                 /* Special case xxx=data */
2096              {
2097              int oplen = (int)(equals - op->long_name);
2098              int arglen = (argequals == NULL)?
2099                (int)strlen(arg) : (int)(argequals - arg);
2100              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2101                {
2102                option_data = arg + arglen;
2103                if (*option_data == '=')
2104                  {
2105                  option_data++;
2106                  longopwasequals = TRUE;
2107                  }
2108                break;
2109                }
2110              }
2111            }
2112    
2113          /* Handle options with an alternate spelling of the name */
2114    
2115          else
2116          {          {
2117          options = handle_option(op->one_char, options);          char buff1[24];
2118          break;          char buff2[24];
2119    
2120            int baselen = (int)(opbra - op->long_name);
2121            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2122            int arglen = (argequals == NULL || equals == NULL)?
2123              (int)strlen(arg) : (int)(argequals - arg);
2124    
2125            sprintf(buff1, "%.*s", baselen, op->long_name);
2126            sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2127    
2128            if (strncmp(arg, buff1, arglen) == 0 ||
2129               strncmp(arg, buff2, arglen) == 0)
2130              {
2131              if (equals != NULL && argequals != NULL)
2132                {
2133                option_data = argequals;
2134                if (*option_data == '=')
2135                  {
2136                  option_data++;
2137                  longopwasequals = TRUE;
2138                  }
2139                }
2140              break;
2141              }
2142          }          }
2143        }        }
2144    
2145      if (op->one_char == 0)      if (op->one_char == 0)
2146        {        {
2147        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2148        exit(usage(2));        pcregrep_exit(usage(2));
2149        }        }
2150      }      }
2151    
2152    /* One-char options */    /* Jeffrey Friedl's debugging harness uses these additional options which
2153      are not in the right form for putting in the option table because they use
2154      only one hyphen, yet are more than one character long. By putting them
2155      separately here, they will not get displayed as part of the help() output,
2156      but I don't think Jeffrey will care about that. */
2157    
2158    #ifdef JFRIEDL_DEBUG
2159      else if (strcmp(argv[i], "-pre") == 0) {
2160              jfriedl_prefix = argv[++i];
2161              continue;
2162      } else if (strcmp(argv[i], "-post") == 0) {
2163              jfriedl_postfix = argv[++i];
2164              continue;
2165      } else if (strcmp(argv[i], "-XT") == 0) {
2166              sscanf(argv[++i], "%d", &jfriedl_XT);
2167              continue;
2168      } else if (strcmp(argv[i], "-XR") == 0) {
2169              sscanf(argv[++i], "%d", &jfriedl_XR);
2170              continue;
2171      }
2172    #endif
2173    
2174    
2175      /* One-char options; many that have no data may be in a single argument; we
2176      continue till we hit the last one or one that needs data. */
2177    
2178    else    else
2179      {      {
2180      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2181        longop = FALSE;
2182      while (*s != 0)      while (*s != 0)
2183        {        {
2184        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2185          {          {
2186          pattern_filename = s + 1;          if (*s == op->one_char) break;
2187          if (pattern_filename[0] == 0)          }
2188            {        if (op->one_char == 0)
2189            if (i >= argc - 1)          {
2190              {          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2191              fprintf(stderr, "pcregrep: File name missing after -f\n");            *s, argv[i]);
2192              exit(usage(2));          pcregrep_exit(usage(2));
2193              }          }
2194            pattern_filename = argv[++i];  
2195            }        /* Check for a single-character option that has data: OP_OP_NUMBER
2196          break;        is used for one that either has a numerical number or defaults, i.e. the
2197          data is optional. If a digit follows, there is data; if not, carry on
2198          with other single-character options in the same string. */
2199    
2200          option_data = s+1;
2201          if (op->type == OP_OP_NUMBER)
2202            {
2203            if (isdigit((unsigned char)s[1])) break;
2204            }
2205          else   /* Check for end or a dataless option */
2206            {
2207            if (op->type != OP_NODATA || s[1] == 0) break;
2208          }          }
2209        else options = handle_option(*s++, options);  
2210          /* Handle a single-character option with no data, then loop for the
2211          next character in the string. */
2212    
2213          pcre_options = handle_option(*s++, pcre_options);
2214        }        }
2215      }      }
   }  
2216    
2217  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
2218  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
2219      something in the PCRE options. */
2220    
2221  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
2222    {      {
2223    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
2224    return 2;      continue;
2225    }      }
2226    
2227  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2228      either has a value or defaults to something. It cannot have data in a
2229      separate item. At the moment, the only such options are "colo(u)r",
2230      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2231    
2232  if (pattern_filename != NULL)    if (*option_data == 0 &&
2233    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
2234      {      {
2235      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
2236        strerror(errno));        {
2237      return 2;        case N_COLOUR:
2238          colour_option = (char *)"auto";
2239          break;
2240    
2241          case 'o':
2242          only_matching = 0;
2243          break;
2244    
2245    #ifdef JFRIEDL_DEBUG
2246          case 'S':
2247          S_arg = 0;
2248          break;
2249    #endif
2250          }
2251        continue;
2252        }
2253    
2254      /* Otherwise, find the data string for the option. */
2255    
2256      if (*option_data == 0)
2257        {
2258        if (i >= argc - 1 || longopwasequals)
2259          {
2260          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2261          pcregrep_exit(usage(2));
2262          }
2263        option_data = argv[++i];
2264      }      }
2265    while (fgets(buffer, sizeof(buffer), f) != NULL)  
2266      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2267      multiple times to create a list of patterns. */
2268    
2269      if (op->type == OP_PATLIST)
2270      {      {
2271      char *s = buffer + (int)strlen(buffer);      if (cmd_pattern_count >= MAX_PATTERN_COUNT)
     if (pattern_count >= MAX_PATTERN_COUNT)  
2272        {        {
2273        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2274          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2275        return 2;        return 2;
2276        }        }
2277      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2278      if (s == buffer) continue;      }
2279      *s = 0;  
2280      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2281        &errptr, NULL);  
2282      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2283               op->type != OP_OP_NUMBER)
2284        {
2285        *((char **)op->dataptr) = option_data;
2286        }
2287    
2288      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2289      only for unpicking arguments, so just keep it simple. */
2290    
2291      else
2292        {
2293        unsigned long int n = 0;
2294        char *endptr = option_data;
2295        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2296        while (isdigit((unsigned char)(*endptr)))
2297          n = n * 10 + (int)(*endptr++ - '0');
2298        if (*endptr != 0)
2299        {        {
2300        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
2301          pattern_count, errptr, error);          {
2302        return 2;          char *equals = strchr(op->long_name, '=');
2303            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2304              (int)(equals - op->long_name);
2305            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2306              option_data, nlen, op->long_name);
2307            }
2308          else
2309            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2310              option_data, op->one_char);
2311          pcregrep_exit(usage(2));
2312        }        }
2313        if (op->type == OP_LONGNUMBER)
2314            *((unsigned long int *)op->dataptr) = n;
2315        else
2316            *((int *)op->dataptr) = n;
2317        }
2318      }
2319    
2320    /* Options have been decoded. If -C was used, its value is used as a default
2321    for -A and -B. */
2322    
2323    if (both_context > 0)
2324      {
2325      if (after_context == 0) after_context = both_context;
2326      if (before_context == 0) before_context = both_context;
2327      }
2328    
2329    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2330    However, the latter two set only_matching. */
2331    
2332    if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2333        (file_offsets && line_offsets))
2334      {
2335      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2336        "and/or --line-offsets\n");
2337      pcregrep_exit(usage(2));
2338      }
2339    
2340    if (file_offsets || line_offsets) only_matching = 0;
2341    
2342    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2343    LC_ALL environment variable is set, and if so, use it. */
2344    
2345    if (locale == NULL)
2346      {
2347      locale = getenv("LC_ALL");
2348      locale_from = "LCC_ALL";
2349      }
2350    
2351    if (locale == NULL)
2352      {
2353      locale = getenv("LC_CTYPE");
2354      locale_from = "LC_CTYPE";
2355      }
2356    
2357    /* If a locale has been provided, set it, and generate the tables the PCRE
2358    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2359    
2360    if (locale != NULL)
2361      {
2362      if (setlocale(LC_CTYPE, locale) == NULL)
2363        {
2364        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2365          locale, locale_from);
2366        return 2;
2367        }
2368      pcretables = pcre_maketables();
2369      }
2370    
2371    /* Sort out colouring */
2372    
2373    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2374      {
2375      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2376      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2377      else
2378        {
2379        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2380          colour_option);
2381        return 2;
2382        }
2383      if (do_colour)
2384        {
2385        char *cs = getenv("PCREGREP_COLOUR");
2386        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2387        if (cs != NULL) colour_string = cs;
2388      }      }
   fclose(f);  
2389    }    }
2390    
2391  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2392    
2393    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2394      {
2395      pcre_options |= PCRE_NEWLINE_CR;
2396      endlinetype = EL_CR;
2397      }
2398    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2399      {
2400      pcre_options |= PCRE_NEWLINE_LF;
2401      endlinetype = EL_LF;
2402      }
2403    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2404      {
2405      pcre_options |= PCRE_NEWLINE_CRLF;
2406      endlinetype = EL_CRLF;
2407      }
2408    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2409      {
2410      pcre_options |= PCRE_NEWLINE_ANY;
2411      endlinetype = EL_ANY;
2412      }
2413    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2414      {
2415      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2416      endlinetype = EL_ANYCRLF;
2417      }
2418  else  else
2419    {    {
2420    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2421    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2422    if (pattern_list[0] == NULL)    }
2423    
2424    /* Interpret the text values for -d and -D */
2425    
2426    if (dee_option != NULL)
2427      {
2428      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2429      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2430      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2431      else
2432      {      {
2433      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
       error);  
2434      return 2;      return 2;
2435      }      }
   pattern_count++;  
2436    }    }
2437    
2438  /* Study the regular expressions, as we will be running them may times */  if (DEE_option != NULL)
2439      {
2440      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2441      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2442      else
2443        {
2444        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2445        return 2;
2446        }
2447      }
2448    
2449    /* Check the values for Jeffrey Friedl's debugging options. */
2450    
2451    #ifdef JFRIEDL_DEBUG
2452    if (S_arg > 9)
2453      {
2454      fprintf(stderr, "pcregrep: bad value for -S option\n");
2455      return 2;
2456      }
2457    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2458      {
2459      if (jfriedl_XT == 0) jfriedl_XT = 1;
2460      if (jfriedl_XR == 0) jfriedl_XR = 1;
2461      }
2462    #endif
2463    
2464    /* Get memory to store the pattern and hints lists. */
2465    
2466    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2467    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2468    
2469    if (pattern_list == NULL || hints_list == NULL)
2470      {
2471      fprintf(stderr, "pcregrep: malloc failed\n");
2472      goto EXIT2;
2473      }
2474    
2475    /* If no patterns were provided by -e, and there is no file provided by -f,
2476    the first argument is the one and only pattern, and it must exist. */
2477    
2478    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2479      {
2480      if (i >= argc) return usage(2);
2481      patterns[cmd_pattern_count++] = argv[i++];
2482      }
2483    
2484    /* Compile the patterns that were provided on the command line, either by
2485    multiple uses of -e or as a single unkeyed pattern. */
2486    
2487    for (j = 0; j < cmd_pattern_count; j++)
2488      {
2489      if (!compile_pattern(patterns[j], pcre_options, NULL,
2490           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2491        goto EXIT2;
2492      }
2493    
2494    /* Compile the regular expressions that are provided in a file. */
2495    
2496    if (pattern_filename != NULL)
2497      {
2498      int linenumber = 0;
2499      FILE *f;
2500      char *filename;
2501      char buffer[MBUFTHIRD];
2502    
2503      if (strcmp(pattern_filename, "-") == 0)
2504        {
2505        f = stdin;
2506        filename = stdin_name;
2507        }
2508      else
2509        {
2510        f = fopen(pattern_filename, "r");
2511        if (f == NULL)
2512          {
2513          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2514            strerror(errno));
2515          goto EXIT2;
2516          }
2517        filename = pattern_filename;
2518        }
2519    
2520      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2521        {
2522        char *s = buffer + (int)strlen(buffer);
2523        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2524        *s = 0;
2525        linenumber++;
2526        if (buffer[0] == 0) continue;   /* Skip blank lines */
2527        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2528          goto EXIT2;
2529        }
2530    
2531      if (f != stdin) fclose(f);
2532      }
2533    
2534    /* Study the regular expressions, as we will be running them many times */
2535    
2536  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2537    {    {
# Line 513  for (j = 0; j < pattern_count; j++) Line 2541  for (j = 0; j < pattern_count; j++)
2541      char s[16];      char s[16];
2542      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2543      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2544      return 2;      goto EXIT2;
2545        }
2546      hint_count++;
2547      }
2548    
2549    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2550    pcre_extra block for each pattern. */
2551    
2552    if (match_limit > 0 || match_limit_recursion > 0)
2553      {
2554      for (j = 0; j < pattern_count; j++)
2555        {
2556        if (hints_list[j] == NULL)
2557          {
2558          hints_list[j] = malloc(sizeof(pcre_extra));
2559          if (hints_list[j] == NULL)
2560            {
2561            fprintf(stderr, "pcregrep: malloc failed\n");
2562            pcregrep_exit(2);
2563            }
2564          }
2565        if (match_limit > 0)
2566          {
2567          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2568          hints_list[j]->match_limit = match_limit;
2569          }
2570        if (match_limit_recursion > 0)
2571          {
2572          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2573          hints_list[j]->match_limit_recursion = match_limit_recursion;
2574          }
2575        }
2576      }
2577    
2578    /* If there are include or exclude patterns, compile them. */
2579    
2580    if (exclude_pattern != NULL)
2581      {
2582      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2583        pcretables);
2584      if (exclude_compiled == NULL)
2585        {
2586        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2587          errptr, error);
2588        goto EXIT2;
2589        }
2590      }
2591    
2592    if (include_pattern != NULL)
2593      {
2594      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2595        pcretables);
2596      if (include_compiled == NULL)
2597        {
2598        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2599          errptr, error);
2600        goto EXIT2;
2601        }
2602      }
2603    
2604    if (exclude_dir_pattern != NULL)
2605      {
2606      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2607        pcretables);
2608      if (exclude_dir_compiled == NULL)
2609        {
2610        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2611          errptr, error);
2612        goto EXIT2;
2613        }
2614      }
2615    
2616    if (include_dir_pattern != NULL)
2617      {
2618      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2619        pcretables);
2620      if (include_dir_compiled == NULL)
2621        {
2622        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2623          errptr, error);
2624        goto EXIT2;
2625      }      }
2626    }    }
2627    
2628  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2629    
2630  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2631      {
2632      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2633      goto EXIT;
2634      }
2635    
2636  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2637  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2638  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2639    otherwise forced. */
2640    
2641  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2642    
2643  for (; i < argc; i++)  for (; i < argc; i++)
2644    {    {
2645    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2646    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2647      if (frc > 1) rc = frc;
2648        else if (frc == 0 && rc == 1) rc = 0;
2649    }    }
2650    
2651  return rc;  EXIT:
2652    if (pattern_list != NULL)
2653      {
2654      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2655      free(pattern_list);
2656      }
2657    if (hints_list != NULL)
2658      {
2659      for (i = 0; i < hint_count; i++)
2660        {
2661        if (hints_list[i] != NULL) free(hints_list[i]);
2662        }
2663      free(hints_list);
2664      }
2665    pcregrep_exit(rc);
2666    
2667    EXIT2:
2668    rc = 2;
2669    goto EXIT;
2670  }  }
2671    
2672  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.589

  ViewVC Help
Powered by ViewVC 1.1.5