/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 586 by ph10, Wed Jan 12 17:36:47 2011 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2011 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 70  its pattern matching. On a Unix system i
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "2.0 01-Aug-2001"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define MBUFTHIRD BUFSIZ
78    #else
79    #define MBUFTHIRD 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
118  *               Global variables                 *  *               Global variables                 *
119  *************************************************/  *************************************************/
120    
121    /* Jeffrey Friedl has some debugging requirements that are not part of the
122    regular code. */
123    
124    #ifdef JFRIEDL_DEBUG
125    static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130    #endif
131    
132    static int  endlinetype;
133    
134    static char *colour_string = (char *)"1;31";
135    static char *colour_option = NULL;
136    static char *dee_option = NULL;
137    static char *DEE_option = NULL;
138    static char *newline = NULL;
139  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
140    static char *stdin_name = (char *)"(standard input)";
141    static char *locale = NULL;
142    
143    static const unsigned char *pcretables = NULL;
144    
145  static int  pattern_count = 0;  static int  pattern_count = 0;
146  static pcre **pattern_list;  static pcre **pattern_list = NULL;
147  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
148    
149    static char *include_pattern = NULL;
150    static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154    static pcre *include_compiled = NULL;
155    static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159    static int after_context = 0;
160    static int before_context = 0;
161    static int both_context = 0;
162    static int dee_action = dee_READ;
163    static int DEE_action = DEE_READ;
164    static int error_count = 0;
165    static int filenames = FN_DEFAULT;
166    static int only_matching = -1;
167    static int process_options = 0;
168    
169    static unsigned long int match_limit = 0;
170    static unsigned long int match_limit_recursion = 0;
171    
172  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
173  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
174  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
175    static BOOL hyphenpending = FALSE;
176  static BOOL invert = FALSE;  static BOOL invert = FALSE;
177    static BOOL line_buffered = FALSE;
178    static BOOL line_offsets = FALSE;
179    static BOOL multiline = FALSE;
180  static BOOL number = FALSE;  static BOOL number = FALSE;
181  static BOOL recurse = FALSE;  static BOOL omit_zero_count = FALSE;
182    static BOOL resource_error = FALSE;
183    static BOOL quiet = FALSE;
184  static BOOL silent = FALSE;  static BOOL silent = FALSE;
185  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
186    
187  /* Structure for options and list of them */  /* Structure for options and list of them */
188    
189    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190           OP_OP_NUMBER, OP_PATLIST };
191    
192  typedef struct option_item {  typedef struct option_item {
193      int type;
194    int one_char;    int one_char;
195    char *long_name;    void *dataptr;
196    char *help_text;    const char *long_name;
197      const char *help_text;
198  } option_item;  } option_item;
199    
200    /* Options without a single-letter equivalent get a negative value. This can be
201    used to identify them. */
202    
203    #define N_COLOUR       (-1)
204    #define N_EXCLUDE      (-2)
205    #define N_EXCLUDE_DIR  (-3)
206    #define N_HELP         (-4)
207    #define N_INCLUDE      (-5)
208    #define N_INCLUDE_DIR  (-6)
209    #define N_LABEL        (-7)
210    #define N_LOCALE       (-8)
211    #define N_NULL         (-9)
212    #define N_LOFFSETS     (-10)
213    #define N_FOFFSETS     (-11)
214    #define N_LBUFFER      (-12)
215    #define N_M_LIMIT      (-13)
216    #define N_M_LIMIT_REC  (-14)
217    
218  static option_item optionlist[] = {  static option_item optionlist[] = {
219    { -1,  "help",         "display this help and exit" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
220    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
221    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
222    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
223    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
224    { 'n', "line-number",  "print line number with output lines" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
225    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
226    { 's', "no-messages",  "suppress error messages" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
227    { 'V', "version",      "print version information and exit" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
228    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
229    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
230    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
231    { 0,    NULL,           NULL }    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
232      { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
233      { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
234      { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
235      { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
236      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
237      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
238      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
239      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
240      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
241      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
242      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
243      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
245      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
247      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
248      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
249      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
250      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
251      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
252      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254    
255      /* These two were accidentally implemented with underscores instead of
256      hyphens in the option names. As this was not discovered for several releases,
257      the incorrect versions are left in the table for compatibility. However, the
258      --help function misses out any option that has an underscore in its name. */
259    
260      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262    
263    #ifdef JFRIEDL_DEBUG
264      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
265    #endif
266      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
267      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
268      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
269      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
270      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
271      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
272      { OP_NODATA,    0,        NULL,               NULL,            NULL }
273  };  };
274    
275    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277    that the combination of -w and -x has the same effect as -x on its own, so we
278    can treat them as the same. */
279    
280    static const char *prefix[] = {
281      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282    
283    static const char *suffix[] = {
284      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
285    
286    /* UTF-8 tables - used only when the newline setting is "any". */
287    
288    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289    
290    const char utf8_table4[] = {
291      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295    
296    
297    
298  /*************************************************  /*************************************************
299  *       Functions for directory scanning         *  *         Exit from the program                  *
300    *************************************************/
301    
302    /* If there has been a resource error, give a suitable message.
303    
304    Argument:  the return code
305    Returns:   does not return
306    */
307    
308    static void
309    pcregrep_exit(int rc)
310    {
311    if (resource_error)
312      {
313      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316      }
317    
318    exit(rc);
319    }
320    
321    
322    /*************************************************
323    *            OS-specific functions               *
324  *************************************************/  *************************************************/
325    
326  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
327  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
328    
329    
330  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
331    
332  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
333  #include <sys/types.h>  #include <sys/types.h>
334  #include <sys/stat.h>  #include <sys/stat.h>
335  #include <dirent.h>  #include <dirent.h>
336    
337  typedef DIR directory_type;  typedef DIR directory_type;
338    
339  int  static int
340  isdirectory(char *filename)  isdirectory(char *filename)
341  {  {
342  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 345  if (stat(filename, &statbuf) < 0)
345  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
346  }  }
347    
348  directory_type *  static directory_type *
349  opendirectory(char *filename)  opendirectory(char *filename)
350  {  {
351  return opendir(filename);  return opendir(filename);
352  }  }
353    
354  char *  static char *
355  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
356  {  {
357  for (;;)  for (;;)
# Line 108  for (;;) Line 361  for (;;)
361    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
362      return dent->d_name;      return dent->d_name;
363    }    }
364    /* Control never reaches here */
365    }
366    
367    static void
368    closedirectory(directory_type *dir)
369    {
370    closedir(dir);
371    }
372    
373    
374    /************* Test for regular file in Unix **********/
375    
376    static int
377    isregfile(char *filename)
378    {
379    struct stat statbuf;
380    if (stat(filename, &statbuf) < 0)
381      return 1;        /* In the expectation that opening as a file will fail */
382    return (statbuf.st_mode & S_IFMT) == S_IFREG;
383    }
384    
385    
386    /************* Test for a terminal in Unix **********/
387    
388    static BOOL
389    is_stdout_tty(void)
390    {
391    return isatty(fileno(stdout));
392    }
393    
394    static BOOL
395    is_file_tty(FILE *f)
396    {
397    return isatty(fileno(f));
398    }
399    
400    
401    /************* Directory scanning in Win32 ***********/
402    
403    /* I (Philip Hazel) have no means of testing this code. It was contributed by
404    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
405    when it did not exist. David Byron added a patch that moved the #include of
406    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
407    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
408    undefined when it is indeed undefined. */
409    
410    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
411    
412    #ifndef STRICT
413    # define STRICT
414    #endif
415    #ifndef WIN32_LEAN_AND_MEAN
416    # define WIN32_LEAN_AND_MEAN
417    #endif
418    
419    #include <windows.h>
420    
421    #ifndef INVALID_FILE_ATTRIBUTES
422    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
423    #endif
424    
425    typedef struct directory_type
426    {
427    HANDLE handle;
428    BOOL first;
429    WIN32_FIND_DATA data;
430    } directory_type;
431    
432    int
433    isdirectory(char *filename)
434    {
435    DWORD attr = GetFileAttributes(filename);
436    if (attr == INVALID_FILE_ATTRIBUTES)
437      return 0;
438    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
439    }
440    
441    directory_type *
442    opendirectory(char *filename)
443    {
444    size_t len;
445    char *pattern;
446    directory_type *dir;
447    DWORD err;
448    len = strlen(filename);
449    pattern = (char *) malloc(len + 3);
450    dir = (directory_type *) malloc(sizeof(*dir));
451    if ((pattern == NULL) || (dir == NULL))
452      {
453      fprintf(stderr, "pcregrep: malloc failed\n");
454      pcregrep_exit(2);
455      }
456    memcpy(pattern, filename, len);
457    memcpy(&(pattern[len]), "\\*", 3);
458    dir->handle = FindFirstFile(pattern, &(dir->data));
459    if (dir->handle != INVALID_HANDLE_VALUE)
460      {
461      free(pattern);
462      dir->first = TRUE;
463      return dir;
464      }
465    err = GetLastError();
466    free(pattern);
467    free(dir);
468    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
469    return NULL;
470    }
471    
472    char *
473    readdirectory(directory_type *dir)
474    {
475    for (;;)
476      {
477      if (!dir->first)
478        {
479        if (!FindNextFile(dir->handle, &(dir->data)))
480          return NULL;
481        }
482      else
483        {
484        dir->first = FALSE;
485        }
486      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
487        return dir->data.cFileName;
488      }
489    #ifndef _MSC_VER
490  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
491    #endif
492  }  }
493    
494  void  void
495  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
496  {  {
497  closedir(dir);  FindClose(dir->handle);
498    free(dir);
499  }  }
500    
501    
502  #else  /************* Test for regular file in Win32 **********/
503    
504    /* I don't know how to do this, or if it can be done; assume all paths are
505    regular if they are not directories. */
506    
507    int isregfile(char *filename)
508    {
509    return !isdirectory(filename);
510    }
511    
512    
513    /************* Test for a terminal in Win32 **********/
514    
515    /* I don't know how to do this; assume never */
516    
517    static BOOL
518    is_stdout_tty(void)
519    {
520    return FALSE;
521    }
522    
523    static BOOL
524    is_file_tty(FILE *f)
525    {
526    return FALSE;
527    }
528    
529    
530  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
531    
532  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
533    
534    #else
535    
536  typedef void directory_type;  typedef void directory_type;
537    
538  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
539  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
540  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
541  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
542    
543    
544    /************* Test for regular when we can't do it **********/
545    
546    /* Assume all files are regular. */
547    
548    int isregfile(char *filename) { return 1; }
549    
550    
551    /************* Test for a terminal when we can't do it **********/
552    
553    static BOOL
554    is_stdout_tty(void)
555    {
556    return FALSE;
557    }
558    
559    static BOOL
560    is_file_tty(FILE *f)
561    {
562    return FALSE;
563    }
564    
565  #endif  #endif
566    
567    
568    
569  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
570  /*************************************************  /*************************************************
571  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
572  *************************************************/  *************************************************/
# Line 159  return sys_errlist[n]; Line 589  return sys_errlist[n];
589    
590    
591  /*************************************************  /*************************************************
592  *              Grep an individual file           *  *            Read one line of input              *
593  *************************************************/  *************************************************/
594    
595    /* Normally, input is read using fread() into a large buffer, so many lines may
596    be read at once. However, doing this for tty input means that no output appears
597    until a lot of input has been typed. Instead, tty input is handled line by
598    line. We cannot use fgets() for this, because it does not stop at a binary
599    zero, and therefore there is no way of telling how many characters it has read,
600    because there may be binary zeros embedded in the data.
601    
602    Arguments:
603      buffer     the buffer to read into
604      length     the maximum number of characters to read
605      f          the file
606    
607    Returns:     the number of characters read, zero at end of file
608    */
609    
610  static int  static int
611  pcregrep(FILE *in, char *name)  read_one_line(char *buffer, int length, FILE *f)
612  {  {
613  int rc = 1;  int c;
614  int linenumber = 0;  int yield = 0;
615  int count = 0;  while ((c = fgetc(f)) != EOF)
616  int offsets[99];    {
617  char buffer[BUFSIZ];    buffer[yield++] = c;
618      if (c == '\n' || yield >= length) break;
619      }
620    return yield;
621    }
622    
623    
624    
625    /*************************************************
626    *             Find end of line                   *
627    *************************************************/
628    
629    /* The length of the endline sequence that is found is set via lenptr. This may
630    be zero at the very end of the file if there is no line-ending sequence there.
631    
632    Arguments:
633      p         current position in line
634      endptr    end of available data
635      lenptr    where to put the length of the eol sequence
636    
637    Returns:    pointer to the last byte of the line
638    */
639    
640  while (fgets(buffer, sizeof(buffer), in) != NULL)  static char *
641    end_of_line(char *p, char *endptr, int *lenptr)
642    {
643    switch(endlinetype)
644    {    {
645    BOOL match = FALSE;    default:      /* Just in case */
646    int i;    case EL_LF:
647    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
648    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
649    linenumber++;      {
650        *lenptr = 1;
651        return p + 1;
652        }
653      *lenptr = 0;
654      return endptr;
655    
656    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
657      while (p < endptr && *p != '\r') p++;
658      if (p < endptr)
659      {      {
660      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
661        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
662      }      }
663      *lenptr = 0;
664      return endptr;
665    
666    if (match != invert)    case EL_CRLF:
667      for (;;)
668      {      {
669      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
670        if (++p >= endptr)
671          {
672          *lenptr = 0;
673          return endptr;
674          }
675        if (*p == '\n')
676          {
677          *lenptr = 2;
678          return p + 1;
679          }
680        }
681      break;
682    
683      else if (filenames_only)    case EL_ANYCRLF:
684      while (p < endptr)
685        {
686        int extra = 0;
687        register int c = *((unsigned char *)p);
688    
689        if (utf8 && c >= 0xc0)
690        {        {
691        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
692        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
693          gcss = 6*extra;
694          c = (c & utf8_table3[extra]) << gcss;
695          for (gcii = 1; gcii <= extra; gcii++)
696            {
697            gcss -= 6;
698            c |= (p[gcii] & 0x3f) << gcss;
699            }
700        }        }
701    
702      else if (silent) return 0;      p += 1 + extra;
703    
704      else      switch (c)
705        {        {
706        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
707        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
708        fprintf(stdout, "%s\n", buffer);        return p;
709    
710          case 0x0d:    /* CR */
711          if (p < endptr && *p == 0x0a)
712            {
713            *lenptr = 2;
714            p++;
715            }
716          else *lenptr = 1;
717          return p;
718    
719          default:
720          break;
721        }        }
722        }   /* End of loop for ANYCRLF case */
723    
724      rc = 0;    *lenptr = 0;  /* Must have hit the end */
725      }    return endptr;
   }  
726    
727  if (count_only)    case EL_ANY:
728    {    while (p < endptr)
729    if (name != NULL) fprintf(stdout, "%s:", name);      {
730    fprintf(stdout, "%d\n", count);      int extra = 0;
731    }      register int c = *((unsigned char *)p);
732    
733  return rc;      if (utf8 && c >= 0xc0)
734  }        {
735          int gcii, gcss;
736          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
737          gcss = 6*extra;
738          c = (c & utf8_table3[extra]) << gcss;
739          for (gcii = 1; gcii <= extra; gcii++)
740            {
741            gcss -= 6;
742            c |= (p[gcii] & 0x3f) << gcss;
743            }
744          }
745    
746        p += 1 + extra;
747    
748        switch (c)
749          {
750          case 0x0a:    /* LF */
751          case 0x0b:    /* VT */
752          case 0x0c:    /* FF */
753          *lenptr = 1;
754          return p;
755    
756          case 0x0d:    /* CR */
757          if (p < endptr && *p == 0x0a)
758            {
759            *lenptr = 2;
760            p++;
761            }
762          else *lenptr = 1;
763          return p;
764    
765          case 0x85:    /* NEL */
766          *lenptr = utf8? 2 : 1;
767          return p;
768    
769          case 0x2028:  /* LS */
770          case 0x2029:  /* PS */
771          *lenptr = 3;
772          return p;
773    
774          default:
775          break;
776          }
777        }   /* End of loop for ANY case */
778    
779      *lenptr = 0;  /* Must have hit the end */
780      return endptr;
781      }     /* End of overall switch */
782    }
783    
784    
785    
786  /*************************************************  /*************************************************
787  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
788  *************************************************/  *************************************************/
789    
790  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
791    
792  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
793  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
794      startptr  start of available data
795    
796  if ((sep = isdirectory(filename)) != 0 && recurse)  Returns:    pointer to the start of the previous line
797    {  */
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
798    
799    if (dir == NULL)  static char *
800      {  previous_line(char *p, char *startptr)
801      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  {
802        strerror(errno));  switch(endlinetype)
803      return 2;    {
804      }    default:      /* Just in case */
805      case EL_LF:
806      p--;
807      while (p > startptr && p[-1] != '\n') p--;
808      return p;
809    
810      case EL_CR:
811      p--;
812      while (p > startptr && p[-1] != '\n') p--;
813      return p;
814    
815    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
816      for (;;)
817      {      {
818      int frc;      p -= 2;
819      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      while (p > startptr && p[-1] != '\n') p--;
820      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);      if (p <= startptr + 1 || p[-2] == '\r') return p;
     if (frc == 0 && rc == 1) rc = 0;  
821      }      }
822      return p;   /* But control should never get here */
823    
824    closedirectory(dir);    case EL_ANY:
825    return rc;    case EL_ANYCRLF:
826    }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
827      if (utf8) while ((*p & 0xc0) == 0x80) p--;
828    
829  /* If the file is not a directory, or we are not recursing, scan it. If this is    while (p > startptr)
830  the first and only argument at top level, we don't show the file name.      {
831  Otherwise, control is via the show_filenames variable. */      register int c;
832        char *pp = p - 1;
833    
834  in = fopen(filename, "r");      if (utf8)
835  if (in == NULL)        {
836    {        int extra = 0;
837    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));        while ((*pp & 0xc0) == 0x80) pp--;
838    return 2;        c = *((unsigned char *)pp);
839    }        if (c >= 0xc0)
840            {
841            int gcii, gcss;
842            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
843            gcss = 6*extra;
844            c = (c & utf8_table3[extra]) << gcss;
845            for (gcii = 1; gcii <= extra; gcii++)
846              {
847              gcss -= 6;
848              c |= (pp[gcii] & 0x3f) << gcss;
849              }
850            }
851          }
852        else c = *((unsigned char *)pp);
853    
854  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);      if (endlinetype == EL_ANYCRLF) switch (c)
855  fclose(in);        {
856  return rc;        case 0x0a:    /* LF */
857  }        case 0x0d:    /* CR */
858          return p;
859    
860          default:
861          break;
862          }
863    
864        else switch (c)
865          {
866          case 0x0a:    /* LF */
867          case 0x0b:    /* VT */
868          case 0x0c:    /* FF */
869          case 0x0d:    /* CR */
870          case 0x85:    /* NEL */
871          case 0x2028:  /* LS */
872          case 0x2029:  /* PS */
873          return p;
874    
875          default:
876          break;
877          }
878    
879  /*************************************************      p = pp;  /* Back one character */
880  *                Usage function                  *      }        /* End of loop for ANY case */
 *************************************************/  
881    
882  static int    return startptr;  /* Hit start of data */
883  usage(int rc)    }     /* End of overall switch */
 {  
 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
884  }  }
885    
886    
887    
888    
889    
890  /*************************************************  /*************************************************
891  *                Help function                   *  *       Print the previous "after" lines         *
892    *************************************************/
893    
894    /* This is called if we are about to lose said lines because of buffer filling,
895    and at the end of the file. The data in the line is written using fwrite() so
896    that a binary zero does not terminate it.
897    
898    Arguments:
899      lastmatchnumber   the number of the last matching line, plus one
900      lastmatchrestart  where we restarted after the last match
901      endptr            end of available data
902      printname         filename for printing
903    
904    Returns:            nothing
905    */
906    
907    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
908      char *endptr, char *printname)
909    {
910    if (after_context > 0 && lastmatchnumber > 0)
911      {
912      int count = 0;
913      while (lastmatchrestart < endptr && count++ < after_context)
914        {
915        int ellength;
916        char *pp = lastmatchrestart;
917        if (printname != NULL) fprintf(stdout, "%s-", printname);
918        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
919        pp = end_of_line(pp, endptr, &ellength);
920        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
921        lastmatchrestart = pp;
922        }
923      hyphenpending = TRUE;
924      }
925    }
926    
927    
928    
929    /*************************************************
930    *   Apply patterns to subject till one matches   *
931    *************************************************/
932    
933    /* This function is called to run through all patterns, looking for a match. It
934    is used multiple times for the same subject when colouring is enabled, in order
935    to find all possible matches.
936    
937    Arguments:
938      matchptr    the start of the subject
939      length      the length of the subject to match
940      offsets     the offets vector to fill in
941      mrc         address of where to put the result of pcre_exec()
942    
943    Returns:      TRUE if there was a match
944                  FALSE if there was no match
945                  invert if there was a non-fatal error
946    */
947    
948    static BOOL
949    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
950    {
951    int i;
952    size_t slen = length;
953    const char *msg = "this text:\n\n";
954    if (slen > 200)
955      {
956      slen = 200;
957      msg = "text that starts:\n\n";
958      }
959    for (i = 0; i < pattern_count; i++)
960      {
961      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
962        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
963      if (*mrc >= 0) return TRUE;
964      if (*mrc == PCRE_ERROR_NOMATCH) continue;
965      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
966      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
967      fprintf(stderr, "%s", msg);
968      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
969      fprintf(stderr, "\n\n");
970      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
971        resource_error = TRUE;
972      if (error_count++ > 20)
973        {
974        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
975        pcregrep_exit(2);
976        }
977      return invert;    /* No more matching; don't show the line again */
978      }
979    
980    return FALSE;  /* No match, no errors */
981    }
982    
983    
984    
985    /*************************************************
986    *            Grep an individual file             *
987    *************************************************/
988    
989    /* This is called from grep_or_recurse() below. It uses a buffer that is three
990    times the value of MBUFTHIRD. The matching point is never allowed to stray into
991    the top third of the buffer, thus keeping more of the file available for
992    context printing or for multiline scanning. For large files, the pointer will
993    be in the middle third most of the time, so the bottom third is available for
994    "before" context printing.
995    
996    Arguments:
997      handle       the fopened FILE stream for a normal file
998                   the gzFile pointer when reading is via libz
999                   the BZFILE pointer when reading is via libbz2
1000      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1001      printname    the file name if it is to be printed for each match
1002                   or NULL if the file name is not to be printed
1003                   it cannot be NULL if filenames[_nomatch]_only is set
1004    
1005    Returns:       0 if there was at least one match
1006                   1 otherwise (no matches)
1007                   2 if there is a read error on a .bz2 file
1008    */
1009    
1010    static int
1011    pcregrep(void *handle, int frtype, char *printname)
1012    {
1013    int rc = 1;
1014    int linenumber = 1;
1015    int lastmatchnumber = 0;
1016    int count = 0;
1017    int filepos = 0;
1018    int offsets[OFFSET_SIZE];
1019    char *lastmatchrestart = NULL;
1020    char buffer[3*MBUFTHIRD];
1021    char *ptr = buffer;
1022    char *endptr;
1023    size_t bufflength;
1024    BOOL endhyphenpending = FALSE;
1025    BOOL input_line_buffered = line_buffered;
1026    FILE *in = NULL;                    /* Ensure initialized */
1027    
1028    #ifdef SUPPORT_LIBZ
1029    gzFile ingz = NULL;
1030    #endif
1031    
1032    #ifdef SUPPORT_LIBBZ2
1033    BZFILE *inbz2 = NULL;
1034    #endif
1035    
1036    
1037    /* Do the first read into the start of the buffer and set up the pointer to end
1038    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1039    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1040    fail. */
1041    
1042    #ifdef SUPPORT_LIBZ
1043    if (frtype == FR_LIBZ)
1044      {
1045      ingz = (gzFile)handle;
1046      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1047      }
1048    else
1049    #endif
1050    
1051    #ifdef SUPPORT_LIBBZ2
1052    if (frtype == FR_LIBBZ2)
1053      {
1054      inbz2 = (BZFILE *)handle;
1055      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1056      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1057      }                                    /* without the cast it is unsigned. */
1058    else
1059    #endif
1060    
1061      {
1062      in = (FILE *)handle;
1063      if (is_file_tty(in)) input_line_buffered = TRUE;
1064      bufflength = input_line_buffered?
1065        read_one_line(buffer, 3*MBUFTHIRD, in) :
1066        fread(buffer, 1, 3*MBUFTHIRD, in);
1067      }
1068    
1069    endptr = buffer + bufflength;
1070    
1071    /* Loop while the current pointer is not at the end of the file. For large
1072    files, endptr will be at the end of the buffer when we are in the middle of the
1073    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1074    way, the buffer is shifted left and re-filled. */
1075    
1076    while (ptr < endptr)
1077      {
1078      int endlinelength;
1079      int mrc = 0;
1080      BOOL match;
1081      char *matchptr = ptr;
1082      char *t = ptr;
1083      size_t length, linelength;
1084    
1085      /* At this point, ptr is at the start of a line. We need to find the length
1086      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1087      length remainder of the data in the buffer. Otherwise, it is the length of
1088      the next line, excluding the terminating newline. After matching, we always
1089      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1090      option is used for compiling, so that any match is constrained to be in the
1091      first line. */
1092    
1093      t = end_of_line(t, endptr, &endlinelength);
1094      linelength = t - ptr - endlinelength;
1095      length = multiline? (size_t)(endptr - ptr) : linelength;
1096    
1097      /* Extra processing for Jeffrey Friedl's debugging. */
1098    
1099    #ifdef JFRIEDL_DEBUG
1100      if (jfriedl_XT || jfriedl_XR)
1101      {
1102          #include <sys/time.h>
1103          #include <time.h>
1104          struct timeval start_time, end_time;
1105          struct timezone dummy;
1106          int i;
1107    
1108          if (jfriedl_XT)
1109          {
1110              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1111              const char *orig = ptr;
1112              ptr = malloc(newlen + 1);
1113              if (!ptr) {
1114                      printf("out of memory");
1115                      pcregrep_exit(2);
1116              }
1117              endptr = ptr;
1118              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1119              for (i = 0; i < jfriedl_XT; i++) {
1120                      strncpy(endptr, orig,  length);
1121                      endptr += length;
1122              }
1123              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1124              length = newlen;
1125          }
1126    
1127          if (gettimeofday(&start_time, &dummy) != 0)
1128                  perror("bad gettimeofday");
1129    
1130    
1131          for (i = 0; i < jfriedl_XR; i++)
1132              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1133                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1134    
1135          if (gettimeofday(&end_time, &dummy) != 0)
1136                  perror("bad gettimeofday");
1137    
1138          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1139                          -
1140                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1141    
1142          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1143          return 0;
1144      }
1145    #endif
1146    
1147      /* We come back here after a match when the -o option (only_matching) is set,
1148      in order to find any further matches in the same line. */
1149    
1150      ONLY_MATCHING_RESTART:
1151    
1152      /* Run through all the patterns until one matches or there is an error other
1153      than NOMATCH. This code is in a subroutine so that it can be re-used for
1154      finding subsequent matches when colouring matched lines. */
1155    
1156      match = match_patterns(matchptr, length, offsets, &mrc);
1157    
1158      /* If it's a match or a not-match (as required), do what's wanted. */
1159    
1160      if (match != invert)
1161        {
1162        BOOL hyphenprinted = FALSE;
1163    
1164        /* We've failed if we want a file that doesn't have any matches. */
1165    
1166        if (filenames == FN_NOMATCH_ONLY) return 1;
1167    
1168        /* Just count if just counting is wanted. */
1169    
1170        if (count_only) count++;
1171    
1172        /* If all we want is a file name, there is no need to scan any more lines
1173        in the file. */
1174    
1175        else if (filenames == FN_MATCH_ONLY)
1176          {
1177          fprintf(stdout, "%s\n", printname);
1178          return 0;
1179          }
1180    
1181        /* Likewise, if all we want is a yes/no answer. */
1182    
1183        else if (quiet) return 0;
1184    
1185        /* The --only-matching option prints just the substring that matched, or a
1186        captured portion of it, as long as this string is not empty, and the
1187        --file-offsets and --line-offsets options output offsets for the matching
1188        substring (they both force --only-matching = 0). None of these options
1189        prints any context. Afterwards, adjust the start and length, and then jump
1190        back to look for further matches in the same line. If we are in invert
1191        mode, however, nothing is printed and we do not restart - this could still
1192        be useful because the return code is set. */
1193    
1194        else if (only_matching >= 0)
1195          {
1196          if (!invert)
1197            {
1198            if (printname != NULL) fprintf(stdout, "%s:", printname);
1199            if (number) fprintf(stdout, "%d:", linenumber);
1200            if (line_offsets)
1201              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1202                offsets[1] - offsets[0]);
1203            else if (file_offsets)
1204              fprintf(stdout, "%d,%d\n",
1205                (int)(filepos + matchptr + offsets[0] - ptr),
1206                offsets[1] - offsets[0]);
1207            else if (only_matching < mrc)
1208              {
1209              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1210              if (plen > 0)
1211                {
1212                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1213                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1214                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1215                fprintf(stdout, "\n");
1216                }
1217              }
1218            else if (printname != NULL || number) fprintf(stdout, "\n");
1219            matchptr += offsets[1];
1220            length -= offsets[1];
1221            match = FALSE;
1222            if (line_buffered) fflush(stdout);
1223            rc = 0;    /* Had some success */
1224            goto ONLY_MATCHING_RESTART;
1225            }
1226          }
1227    
1228        /* This is the default case when none of the above options is set. We print
1229        the matching lines(s), possibly preceded and/or followed by other lines of
1230        context. */
1231    
1232        else
1233          {
1234          /* See if there is a requirement to print some "after" lines from a
1235          previous match. We never print any overlaps. */
1236    
1237          if (after_context > 0 && lastmatchnumber > 0)
1238            {
1239            int ellength;
1240            int linecount = 0;
1241            char *p = lastmatchrestart;
1242    
1243            while (p < ptr && linecount < after_context)
1244              {
1245              p = end_of_line(p, ptr, &ellength);
1246              linecount++;
1247              }
1248    
1249            /* It is important to advance lastmatchrestart during this printing so
1250            that it interacts correctly with any "before" printing below. Print
1251            each line's data using fwrite() in case there are binary zeroes. */
1252    
1253            while (lastmatchrestart < p)
1254              {
1255              char *pp = lastmatchrestart;
1256              if (printname != NULL) fprintf(stdout, "%s-", printname);
1257              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1258              pp = end_of_line(pp, endptr, &ellength);
1259              FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1260              lastmatchrestart = pp;
1261              }
1262            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1263            }
1264    
1265          /* If there were non-contiguous lines printed above, insert hyphens. */
1266    
1267          if (hyphenpending)
1268            {
1269            fprintf(stdout, "--\n");
1270            hyphenpending = FALSE;
1271            hyphenprinted = TRUE;
1272            }
1273    
1274          /* See if there is a requirement to print some "before" lines for this
1275          match. Again, don't print overlaps. */
1276    
1277          if (before_context > 0)
1278            {
1279            int linecount = 0;
1280            char *p = ptr;
1281    
1282            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1283                   linecount < before_context)
1284              {
1285              linecount++;
1286              p = previous_line(p, buffer);
1287              }
1288    
1289            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1290              fprintf(stdout, "--\n");
1291    
1292            while (p < ptr)
1293              {
1294              int ellength;
1295              char *pp = p;
1296              if (printname != NULL) fprintf(stdout, "%s-", printname);
1297              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1298              pp = end_of_line(pp, endptr, &ellength);
1299              FWRITE(p, 1, pp - p, stdout);
1300              p = pp;
1301              }
1302            }
1303    
1304          /* Now print the matching line(s); ensure we set hyphenpending at the end
1305          of the file if any context lines are being output. */
1306    
1307          if (after_context > 0 || before_context > 0)
1308            endhyphenpending = TRUE;
1309    
1310          if (printname != NULL) fprintf(stdout, "%s:", printname);
1311          if (number) fprintf(stdout, "%d:", linenumber);
1312    
1313          /* In multiline mode, we want to print to the end of the line in which
1314          the end of the matched string is found, so we adjust linelength and the
1315          line number appropriately, but only when there actually was a match
1316          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1317          the match will always be before the first newline sequence. */
1318    
1319          if (multiline)
1320            {
1321            int ellength;
1322            char *endmatch = ptr;
1323            if (!invert)
1324              {
1325              endmatch += offsets[1];
1326              t = ptr;
1327              while (t < endmatch)
1328                {
1329                t = end_of_line(t, endptr, &ellength);
1330                if (t <= endmatch) linenumber++; else break;
1331                }
1332              }
1333            endmatch = end_of_line(endmatch, endptr, &ellength);
1334            linelength = endmatch - ptr - ellength;
1335            }
1336    
1337          /*** NOTE: Use only fwrite() to output the data line, so that binary
1338          zeroes are treated as just another data character. */
1339    
1340          /* This extra option, for Jeffrey Friedl's debugging requirements,
1341          replaces the matched string, or a specific captured string if it exists,
1342          with X. When this happens, colouring is ignored. */
1343    
1344    #ifdef JFRIEDL_DEBUG
1345          if (S_arg >= 0 && S_arg < mrc)
1346            {
1347            int first = S_arg * 2;
1348            int last  = first + 1;
1349            FWRITE(ptr, 1, offsets[first], stdout);
1350            fprintf(stdout, "X");
1351            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1352            }
1353          else
1354    #endif
1355    
1356          /* We have to split the line(s) up if colouring, and search for further
1357          matches, but not of course if the line is a non-match. */
1358    
1359          if (do_colour && !invert)
1360            {
1361            int last_offset = 0;
1362            FWRITE(ptr, 1, offsets[0], stdout);
1363            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1364            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1365            fprintf(stdout, "%c[00m", 0x1b);
1366            for (;;)
1367              {
1368              last_offset += offsets[1];
1369              matchptr += offsets[1];
1370              length -= offsets[1];
1371              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1372              FWRITE(matchptr, 1, offsets[0], stdout);
1373              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1374              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1375              fprintf(stdout, "%c[00m", 0x1b);
1376              }
1377            FWRITE(ptr + last_offset, 1,
1378              (linelength + endlinelength) - last_offset, stdout);
1379            }
1380    
1381          /* Not colouring; no need to search for further matches */
1382    
1383          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1384          }
1385    
1386        /* End of doing what has to be done for a match. If --line-buffered was
1387        given, flush the output. */
1388    
1389        if (line_buffered) fflush(stdout);
1390        rc = 0;    /* Had some success */
1391    
1392        /* Remember where the last match happened for after_context. We remember
1393        where we are about to restart, and that line's number. */
1394    
1395        lastmatchrestart = ptr + linelength + endlinelength;
1396        lastmatchnumber = linenumber + 1;
1397        }
1398    
1399      /* For a match in multiline inverted mode (which of course did not cause
1400      anything to be printed), we have to move on to the end of the match before
1401      proceeding. */
1402    
1403      if (multiline && invert && match)
1404        {
1405        int ellength;
1406        char *endmatch = ptr + offsets[1];
1407        t = ptr;
1408        while (t < endmatch)
1409          {
1410          t = end_of_line(t, endptr, &ellength);
1411          if (t <= endmatch) linenumber++; else break;
1412          }
1413        endmatch = end_of_line(endmatch, endptr, &ellength);
1414        linelength = endmatch - ptr - ellength;
1415        }
1416    
1417      /* Advance to after the newline and increment the line number. The file
1418      offset to the current line is maintained in filepos. */
1419    
1420      ptr += linelength + endlinelength;
1421      filepos += (int)(linelength + endlinelength);
1422      linenumber++;
1423    
1424      /* If input is line buffered, and the buffer is not yet full, read another
1425      line and add it into the buffer. */
1426    
1427      if (input_line_buffered && bufflength < sizeof(buffer))
1428        {
1429        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1430        bufflength += add;
1431        endptr += add;
1432        }
1433    
1434      /* If we haven't yet reached the end of the file (the buffer is full), and
1435      the current point is in the top 1/3 of the buffer, slide the buffer down by
1436      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1437      about to be lost, print them. */
1438    
1439      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1440        {
1441        if (after_context > 0 &&
1442            lastmatchnumber > 0 &&
1443            lastmatchrestart < buffer + MBUFTHIRD)
1444          {
1445          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1446          lastmatchnumber = 0;
1447          }
1448    
1449        /* Now do the shuffle */
1450    
1451        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1452        ptr -= MBUFTHIRD;
1453    
1454    #ifdef SUPPORT_LIBZ
1455        if (frtype == FR_LIBZ)
1456          bufflength = 2*MBUFTHIRD +
1457            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1458        else
1459    #endif
1460    
1461    #ifdef SUPPORT_LIBBZ2
1462        if (frtype == FR_LIBBZ2)
1463          bufflength = 2*MBUFTHIRD +
1464            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1465        else
1466    #endif
1467    
1468        bufflength = 2*MBUFTHIRD +
1469          (input_line_buffered?
1470           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1471           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1472        endptr = buffer + bufflength;
1473    
1474        /* Adjust any last match point */
1475    
1476        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1477        }
1478      }     /* Loop through the whole file */
1479    
1480    /* End of file; print final "after" lines if wanted; do_after_lines sets
1481    hyphenpending if it prints something. */
1482    
1483    if (only_matching < 0 && !count_only)
1484      {
1485      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1486      hyphenpending |= endhyphenpending;
1487      }
1488    
1489    /* Print the file name if we are looking for those without matches and there
1490    were none. If we found a match, we won't have got this far. */
1491    
1492    if (filenames == FN_NOMATCH_ONLY)
1493      {
1494      fprintf(stdout, "%s\n", printname);
1495      return 0;
1496      }
1497    
1498    /* Print the match count if wanted */
1499    
1500    if (count_only)
1501      {
1502      if (count > 0 || !omit_zero_count)
1503        {
1504        if (printname != NULL && filenames != FN_NONE)
1505          fprintf(stdout, "%s:", printname);
1506        fprintf(stdout, "%d\n", count);
1507        }
1508      }
1509    
1510    return rc;
1511    }
1512    
1513    
1514    
1515    /*************************************************
1516    *     Grep a file or recurse into a directory    *
1517    *************************************************/
1518    
1519    /* Given a path name, if it's a directory, scan all the files if we are
1520    recursing; if it's a file, grep it.
1521    
1522    Arguments:
1523      pathname          the path to investigate
1524      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1525      only_one_at_top   TRUE if the path is the only one at toplevel
1526    
1527    Returns:   0 if there was at least one match
1528               1 if there were no matches
1529               2 there was some kind of error
1530    
1531    However, file opening failures are suppressed if "silent" is set.
1532    */
1533    
1534    static int
1535    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1536    {
1537    int rc = 1;
1538    int sep;
1539    int frtype;
1540    int pathlen;
1541    void *handle;
1542    FILE *in = NULL;           /* Ensure initialized */
1543    
1544    #ifdef SUPPORT_LIBZ
1545    gzFile ingz = NULL;
1546    #endif
1547    
1548    #ifdef SUPPORT_LIBBZ2
1549    BZFILE *inbz2 = NULL;
1550    #endif
1551    
1552    /* If the file name is "-" we scan stdin */
1553    
1554    if (strcmp(pathname, "-") == 0)
1555      {
1556      return pcregrep(stdin, FR_PLAIN,
1557        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1558          stdin_name : NULL);
1559      }
1560    
1561    /* If the file is a directory, skip if skipping or if we are recursing, scan
1562    each file and directory within it, subject to any include or exclude patterns
1563    that were set. The scanning code is localized so it can be made
1564    system-specific. */
1565    
1566    if ((sep = isdirectory(pathname)) != 0)
1567      {
1568      if (dee_action == dee_SKIP) return 1;
1569      if (dee_action == dee_RECURSE)
1570        {
1571        char buffer[1024];
1572        char *nextfile;
1573        directory_type *dir = opendirectory(pathname);
1574    
1575        if (dir == NULL)
1576          {
1577          if (!silent)
1578            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1579              strerror(errno));
1580          return 2;
1581          }
1582    
1583        while ((nextfile = readdirectory(dir)) != NULL)
1584          {
1585          int frc, nflen;
1586          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1587          nflen = (int)(strlen(nextfile));
1588    
1589          if (isdirectory(buffer))
1590            {
1591            if (exclude_dir_compiled != NULL &&
1592                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1593              continue;
1594    
1595            if (include_dir_compiled != NULL &&
1596                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1597              continue;
1598            }
1599          else
1600            {
1601            if (exclude_compiled != NULL &&
1602                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1603              continue;
1604    
1605            if (include_compiled != NULL &&
1606                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1607              continue;
1608            }
1609    
1610          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1611          if (frc > 1) rc = frc;
1612           else if (frc == 0 && rc == 1) rc = 0;
1613          }
1614    
1615        closedirectory(dir);
1616        return rc;
1617        }
1618      }
1619    
1620    /* If the file is not a directory and not a regular file, skip it if that's
1621    been requested. */
1622    
1623    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1624    
1625    /* Control reaches here if we have a regular file, or if we have a directory
1626    and recursion or skipping was not requested, or if we have anything else and
1627    skipping was not requested. The scan proceeds. If this is the first and only
1628    argument at top level, we don't show the file name, unless we are only showing
1629    the file name, or the filename was forced (-H). */
1630    
1631    pathlen = (int)(strlen(pathname));
1632    
1633    /* Open using zlib if it is supported and the file name ends with .gz. */
1634    
1635    #ifdef SUPPORT_LIBZ
1636    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1637      {
1638      ingz = gzopen(pathname, "rb");
1639      if (ingz == NULL)
1640        {
1641        if (!silent)
1642          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1643            strerror(errno));
1644        return 2;
1645        }
1646      handle = (void *)ingz;
1647      frtype = FR_LIBZ;
1648      }
1649    else
1650    #endif
1651    
1652    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1653    
1654    #ifdef SUPPORT_LIBBZ2
1655    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1656      {
1657      inbz2 = BZ2_bzopen(pathname, "rb");
1658      handle = (void *)inbz2;
1659      frtype = FR_LIBBZ2;
1660      }
1661    else
1662    #endif
1663    
1664    /* Otherwise use plain fopen(). The label is so that we can come back here if
1665    an attempt to read a .bz2 file indicates that it really is a plain file. */
1666    
1667    #ifdef SUPPORT_LIBBZ2
1668    PLAIN_FILE:
1669    #endif
1670      {
1671      in = fopen(pathname, "rb");
1672      handle = (void *)in;
1673      frtype = FR_PLAIN;
1674      }
1675    
1676    /* All the opening methods return errno when they fail. */
1677    
1678    if (handle == NULL)
1679      {
1680      if (!silent)
1681        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1682          strerror(errno));
1683      return 2;
1684      }
1685    
1686    /* Now grep the file */
1687    
1688    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1689      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1690    
1691    /* Close in an appropriate manner. */
1692    
1693    #ifdef SUPPORT_LIBZ
1694    if (frtype == FR_LIBZ)
1695      gzclose(ingz);
1696    else
1697    #endif
1698    
1699    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1700    read failed. If the error indicates that the file isn't in fact bzipped, try
1701    again as a normal file. */
1702    
1703    #ifdef SUPPORT_LIBBZ2
1704    if (frtype == FR_LIBBZ2)
1705      {
1706      if (rc == 2)
1707        {
1708        int errnum;
1709        const char *err = BZ2_bzerror(inbz2, &errnum);
1710        if (errnum == BZ_DATA_ERROR_MAGIC)
1711          {
1712          BZ2_bzclose(inbz2);
1713          goto PLAIN_FILE;
1714          }
1715        else if (!silent)
1716          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1717            pathname, err);
1718        }
1719      BZ2_bzclose(inbz2);
1720      }
1721    else
1722    #endif
1723    
1724    /* Normal file close */
1725    
1726    fclose(in);
1727    
1728    /* Pass back the yield from pcregrep(). */
1729    
1730    return rc;
1731    }
1732    
1733    
1734    
1735    
1736    /*************************************************
1737    *                Usage function                  *
1738    *************************************************/
1739    
1740    static int
1741    usage(int rc)
1742    {
1743    option_item *op;
1744    fprintf(stderr, "Usage: pcregrep [-");
1745    for (op = optionlist; op->one_char != 0; op++)
1746      {
1747      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1748      }
1749    fprintf(stderr, "] [long options] [pattern] [files]\n");
1750    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1751      "options.\n");
1752    return rc;
1753    }
1754    
1755    
1756    
1757    
1758    /*************************************************
1759    *                Help function                   *
1760  *************************************************/  *************************************************/
1761    
1762  static void  static void
# Line 304  help(void) Line 1764  help(void)
1764  {  {
1765  option_item *op;  option_item *op;
1766    
1767  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1768  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1769  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1770    printf("\"-\" can be used as a file name to mean STDIN.\n");
1771    
1772    #ifdef SUPPORT_LIBZ
1773    printf("Files whose names end in .gz are read using zlib.\n");
1774    #endif
1775    
1776    #ifdef SUPPORT_LIBBZ2
1777    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1778    #endif
1779    
1780    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1781    printf("Other files and the standard input are read as plain files.\n\n");
1782    #else
1783    printf("All files are read as plain files, without any interpretation.\n\n");
1784    #endif
1785    
1786    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1787  printf("Options:\n");  printf("Options:\n");
1788    
1789  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
1790    {    {
1791    int n;    int n;
1792    char s[4];    char s[4];
1793    
1794      /* Two options were accidentally implemented and documented with underscores
1795      instead of hyphens in their names, something that was not noticed for quite a
1796      few releases. When fixing this, I left the underscored versions in the list
1797      in case people were using them. However, we don't want to display them in the
1798      help data. There are no other options that contain underscores, and we do not
1799      expect ever to implement such options. Therefore, just omit any option that
1800      contains an underscore. */
1801    
1802      if (strchr(op->long_name, '_') != NULL) continue;
1803    
1804    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1805    printf("  %s --%s%n", s, op->long_name, &n);    n = 31 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1806    if (n < 1) n = 1;    if (n < 1) n = 1;
1807    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1808    }    }
1809    
1810  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1811  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1812  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
1813    
1814  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1815  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1816  }  }
1817    
# Line 334  printf("Exit status is 0 if any matches, Line 1819  printf("Exit status is 0 if any matches,
1819    
1820    
1821  /*************************************************  /*************************************************
1822  *                Handle an option                *  *    Handle a single-letter, no data option      *
1823    *************************************************/
1824    
1825    static int
1826    handle_option(int letter, int options)
1827    {
1828    switch(letter)
1829      {
1830      case N_FOFFSETS: file_offsets = TRUE; break;
1831      case N_HELP: help(); pcregrep_exit(0);
1832      case N_LOFFSETS: line_offsets = number = TRUE; break;
1833      case N_LBUFFER: line_buffered = TRUE; break;
1834      case 'c': count_only = TRUE; break;
1835      case 'F': process_options |= PO_FIXED_STRINGS; break;
1836      case 'H': filenames = FN_FORCE; break;
1837      case 'h': filenames = FN_NONE; break;
1838      case 'i': options |= PCRE_CASELESS; break;
1839      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1840      case 'L': filenames = FN_NOMATCH_ONLY; break;
1841      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1842      case 'n': number = TRUE; break;
1843      case 'o': only_matching = 0; break;
1844      case 'q': quiet = TRUE; break;
1845      case 'r': dee_action = dee_RECURSE; break;
1846      case 's': silent = TRUE; break;
1847      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1848      case 'v': invert = TRUE; break;
1849      case 'w': process_options |= PO_WORD_MATCH; break;
1850      case 'x': process_options |= PO_LINE_MATCH; break;
1851    
1852      case 'V':
1853      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1854      pcregrep_exit(0);
1855      break;
1856    
1857      default:
1858      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1859      pcregrep_exit(usage(2));
1860      }
1861    
1862    return options;
1863    }
1864    
1865    
1866    
1867    
1868    /*************************************************
1869    *          Construct printed ordinal             *
1870    *************************************************/
1871    
1872    /* This turns a number into "1st", "3rd", etc. */
1873    
1874    static char *
1875    ordin(int n)
1876    {
1877    static char buffer[8];
1878    char *p = buffer;
1879    sprintf(p, "%d", n);
1880    while (*p != 0) p++;
1881    switch (n%10)
1882      {
1883      case 1: strcpy(p, "st"); break;
1884      case 2: strcpy(p, "nd"); break;
1885      case 3: strcpy(p, "rd"); break;
1886      default: strcpy(p, "th"); break;
1887      }
1888    return buffer;
1889    }
1890    
1891    
1892    
1893    /*************************************************
1894    *          Compile a single pattern              *
1895    *************************************************/
1896    
1897    /* When the -F option has been used, this is called for each substring.
1898    Otherwise it's called for each supplied pattern.
1899    
1900    Arguments:
1901      pattern        the pattern string
1902      options        the PCRE options
1903      filename       the file name, or NULL for a command-line pattern
1904      count          0 if this is the only command line pattern, or
1905                     number of the command line pattern, or
1906                     linenumber for a pattern from a file
1907    
1908    Returns:         TRUE on success, FALSE after an error
1909    */
1910    
1911    static BOOL
1912    compile_single_pattern(char *pattern, int options, char *filename, int count)
1913    {
1914    char buffer[MBUFTHIRD + 16];
1915    const char *error;
1916    int errptr;
1917    
1918    if (pattern_count >= MAX_PATTERN_COUNT)
1919      {
1920      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1921        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1922      return FALSE;
1923      }
1924    
1925    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1926      suffix[process_options]);
1927    pattern_list[pattern_count] =
1928      pcre_compile(buffer, options, &error, &errptr, pcretables);
1929    if (pattern_list[pattern_count] != NULL)
1930      {
1931      pattern_count++;
1932      return TRUE;
1933      }
1934    
1935    /* Handle compile errors */
1936    
1937    errptr -= (int)strlen(prefix[process_options]);
1938    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1939    
1940    if (filename == NULL)
1941      {
1942      if (count == 0)
1943        fprintf(stderr, "pcregrep: Error in command-line regex "
1944          "at offset %d: %s\n", errptr, error);
1945      else
1946        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1947          "at offset %d: %s\n", ordin(count), errptr, error);
1948      }
1949    else
1950      {
1951      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1952        "at offset %d: %s\n", count, filename, errptr, error);
1953      }
1954    
1955    return FALSE;
1956    }
1957    
1958    
1959    
1960    /*************************************************
1961    *           Compile one supplied pattern         *
1962  *************************************************/  *************************************************/
1963    
1964  static int  /* When the -F option has been used, each string may be a list of strings,
1965  handle_option(int letter, int options)  separated by line breaks. They will be matched literally.
1966    
1967    Arguments:
1968      pattern        the pattern string
1969      options        the PCRE options
1970      filename       the file name, or NULL for a command-line pattern
1971      count          0 if this is the only command line pattern, or
1972                     number of the command line pattern, or
1973                     linenumber for a pattern from a file
1974    
1975    Returns:         TRUE on success, FALSE after an error
1976    */
1977    
1978    static BOOL
1979    compile_pattern(char *pattern, int options, char *filename, int count)
1980  {  {
1981  switch(letter)  if ((process_options & PO_FIXED_STRINGS) != 0)
1982    {    {
1983    case -1:  help(); exit(0);    char *eop = pattern + strlen(pattern);
1984    case 'c': count_only = TRUE; break;    char buffer[MBUFTHIRD];
1985    case 'h': filenames = FALSE; break;    for(;;)
1986    case 'i': options |= PCRE_CASELESS; break;      {
1987    case 'l': filenames_only = TRUE;      int ellength;
1988    case 'n': number = TRUE; break;      char *p = end_of_line(pattern, eop, &ellength);
1989    case 'r': recurse = TRUE; break;      if (ellength == 0)
1990    case 's': silent = TRUE; break;        return compile_single_pattern(pattern, options, filename, count);
1991    case 'v': invert = TRUE; break;      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1992    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;      pattern = p;
1993        if (!compile_single_pattern(buffer, options, filename, count))
1994    case 'V':        return FALSE;
1995    fprintf(stderr, "pcregrep version %s using ", VERSION);      }
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
   exit(0);  
   break;  
   
   default:  
   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);  
   exit(usage(2));  
1996    }    }
1997    else return compile_single_pattern(pattern, options, filename, count);
 return options;  
1998  }  }
1999    
2000    
2001    
   
2002  /*************************************************  /*************************************************
2003  *                Main program                    *  *                Main program                    *
2004  *************************************************/  *************************************************/
2005    
2006    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2007    
2008  int  int
2009  main(int argc, char **argv)  main(int argc, char **argv)
2010  {  {
2011  int i, j;  int i, j;
2012  int rc = 1;  int rc = 1;
2013  int options = 0;  int pcre_options = 0;
2014    int cmd_pattern_count = 0;
2015    int hint_count = 0;
2016  int errptr;  int errptr;
 const char *error;  
2017  BOOL only_one_at_top;  BOOL only_one_at_top;
2018    char *patterns[MAX_PATTERN_COUNT];
2019    const char *locale_from = "--locale";
2020    const char *error;
2021    
2022    /* Set the default line ending value from the default in the PCRE library;
2023    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2024    Note that the return values from pcre_config(), though derived from the ASCII
2025    codes, are the same in EBCDIC environments, so we must use the actual values
2026    rather than escapes such as as '\r'. */
2027    
2028    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2029    switch(i)
2030      {
2031      default:               newline = (char *)"lf"; break;
2032      case 13:               newline = (char *)"cr"; break;
2033      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2034      case -1:               newline = (char *)"any"; break;
2035      case -2:               newline = (char *)"anycrlf"; break;
2036      }
2037    
2038  /* Process the options */  /* Process the options */
2039    
2040  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
2041    {    {
2042      option_item *op = NULL;
2043      char *option_data = (char *)"";    /* default to keep compiler happy */
2044      BOOL longop;
2045      BOOL longopwasequals = FALSE;
2046    
2047    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2048    
2049    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2050      but only if we have previously had -e or -f to define the patterns. */
2051    
2052      if (argv[i][1] == 0)
2053        {
2054        if (pattern_filename != NULL || pattern_count > 0) break;
2055          else pcregrep_exit(usage(2));
2056        }
2057    
2058      /* Handle a long name option, or -- to terminate the options */
2059    
2060    if (argv[i][1] == '-')    if (argv[i][1] == '-')
2061      {      {
2062      option_item *op;      char *arg = argv[i] + 2;
2063        char *argequals = strchr(arg, '=');
2064    
2065      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
2066        {        {
2067        pattern_filename = argv[i] + 7;        i++;
2068        continue;        break;                /* out of the options-handling loop */
2069        }        }
2070    
2071        longop = TRUE;
2072    
2073        /* Some long options have data that follows after =, for example file=name.
2074        Some options have variations in the long name spelling: specifically, we
2075        allow "regexp" because GNU grep allows it, though I personally go along
2076        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2077        These options are entered in the table as "regex(p)". Options can be in
2078        both these categories. */
2079    
2080      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2081        {        {
2082        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
2083          char *equals = strchr(op->long_name, '=');
2084    
2085          /* Handle options with only one spelling of the name */
2086    
2087          if (opbra == NULL)     /* Does not contain '(' */
2088            {
2089            if (equals == NULL)  /* Not thing=data case */
2090              {
2091              if (strcmp(arg, op->long_name) == 0) break;
2092              }
2093            else                 /* Special case xxx=data */
2094              {
2095              int oplen = (int)(equals - op->long_name);
2096              int arglen = (argequals == NULL)?
2097                (int)strlen(arg) : (int)(argequals - arg);
2098              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2099                {
2100                option_data = arg + arglen;
2101                if (*option_data == '=')
2102                  {
2103                  option_data++;
2104                  longopwasequals = TRUE;
2105                  }
2106                break;
2107                }
2108              }
2109            }
2110    
2111          /* Handle options with an alternate spelling of the name */
2112    
2113          else
2114          {          {
2115          options = handle_option(op->one_char, options);          char buff1[24];
2116          break;          char buff2[24];
2117    
2118            int baselen = (int)(opbra - op->long_name);
2119            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2120            int arglen = (argequals == NULL || equals == NULL)?
2121              (int)strlen(arg) : (int)(argequals - arg);
2122    
2123            sprintf(buff1, "%.*s", baselen, op->long_name);
2124            sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2125    
2126            if (strncmp(arg, buff1, arglen) == 0 ||
2127               strncmp(arg, buff2, arglen) == 0)
2128              {
2129              if (equals != NULL && argequals != NULL)
2130                {
2131                option_data = argequals;
2132                if (*option_data == '=')
2133                  {
2134                  option_data++;
2135                  longopwasequals = TRUE;
2136                  }
2137                }
2138              break;
2139              }
2140          }          }
2141        }        }
2142    
2143      if (op->one_char == 0)      if (op->one_char == 0)
2144        {        {
2145        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2146        exit(usage(2));        pcregrep_exit(usage(2));
2147        }        }
2148      }      }
2149    
2150    /* One-char options */    /* Jeffrey Friedl's debugging harness uses these additional options which
2151      are not in the right form for putting in the option table because they use
2152      only one hyphen, yet are more than one character long. By putting them
2153      separately here, they will not get displayed as part of the help() output,
2154      but I don't think Jeffrey will care about that. */
2155    
2156    #ifdef JFRIEDL_DEBUG
2157      else if (strcmp(argv[i], "-pre") == 0) {
2158              jfriedl_prefix = argv[++i];
2159              continue;
2160      } else if (strcmp(argv[i], "-post") == 0) {
2161              jfriedl_postfix = argv[++i];
2162              continue;
2163      } else if (strcmp(argv[i], "-XT") == 0) {
2164              sscanf(argv[++i], "%d", &jfriedl_XT);
2165              continue;
2166      } else if (strcmp(argv[i], "-XR") == 0) {
2167              sscanf(argv[++i], "%d", &jfriedl_XR);
2168              continue;
2169      }
2170    #endif
2171    
2172    
2173      /* One-char options; many that have no data may be in a single argument; we
2174      continue till we hit the last one or one that needs data. */
2175    
2176    else    else
2177      {      {
2178      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2179        longop = FALSE;
2180      while (*s != 0)      while (*s != 0)
2181        {        {
2182        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2183          {          {
2184          pattern_filename = s + 1;          if (*s == op->one_char) break;
2185          if (pattern_filename[0] == 0)          }
2186            {        if (op->one_char == 0)
2187            if (i >= argc - 1)          {
2188              {          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2189              fprintf(stderr, "pcregrep: File name missing after -f\n");            *s, argv[i]);
2190              exit(usage(2));          pcregrep_exit(usage(2));
2191              }          }
2192            pattern_filename = argv[++i];  
2193            }        /* Check for a single-character option that has data: OP_OP_NUMBER
2194          break;        is used for one that either has a numerical number or defaults, i.e. the
2195          data is optional. If a digit follows, there is data; if not, carry on
2196          with other single-character options in the same string. */
2197    
2198          option_data = s+1;
2199          if (op->type == OP_OP_NUMBER)
2200            {
2201            if (isdigit((unsigned char)s[1])) break;
2202            }
2203          else   /* Check for end or a dataless option */
2204            {
2205            if (op->type != OP_NODATA || s[1] == 0) break;
2206          }          }
2207        else options = handle_option(*s++, options);  
2208          /* Handle a single-character option with no data, then loop for the
2209          next character in the string. */
2210    
2211          pcre_options = handle_option(*s++, pcre_options);
2212        }        }
2213      }      }
   }  
2214    
2215  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
2216  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
2217      something in the PCRE options. */
2218    
2219  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
2220    {      {
2221    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
2222    return 2;      continue;
2223    }      }
2224    
2225  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2226      either has a value or defaults to something. It cannot have data in a
2227      separate item. At the moment, the only such options are "colo(u)r",
2228      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2229    
2230  if (pattern_filename != NULL)    if (*option_data == 0 &&
2231    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
2232      {      {
2233      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
2234        strerror(errno));        {
2235      return 2;        case N_COLOUR:
2236          colour_option = (char *)"auto";
2237          break;
2238    
2239          case 'o':
2240          only_matching = 0;
2241          break;
2242    
2243    #ifdef JFRIEDL_DEBUG
2244          case 'S':
2245          S_arg = 0;
2246          break;
2247    #endif
2248          }
2249        continue;
2250        }
2251    
2252      /* Otherwise, find the data string for the option. */
2253    
2254      if (*option_data == 0)
2255        {
2256        if (i >= argc - 1 || longopwasequals)
2257          {
2258          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2259          pcregrep_exit(usage(2));
2260          }
2261        option_data = argv[++i];
2262      }      }
2263    while (fgets(buffer, sizeof(buffer), f) != NULL)  
2264      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2265      multiple times to create a list of patterns. */
2266    
2267      if (op->type == OP_PATLIST)
2268      {      {
2269      char *s = buffer + (int)strlen(buffer);      if (cmd_pattern_count >= MAX_PATTERN_COUNT)
     if (pattern_count >= MAX_PATTERN_COUNT)  
2270        {        {
2271        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2272          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2273        return 2;        return 2;
2274        }        }
2275      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2276      if (s == buffer) continue;      }
2277      *s = 0;  
2278      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2279        &errptr, NULL);  
2280      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2281               op->type != OP_OP_NUMBER)
2282        {
2283        *((char **)op->dataptr) = option_data;
2284        }
2285    
2286      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2287      only for unpicking arguments, so just keep it simple. */
2288    
2289      else
2290        {
2291        unsigned long int n = 0;
2292        char *endptr = option_data;
2293        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2294        while (isdigit((unsigned char)(*endptr)))
2295          n = n * 10 + (int)(*endptr++ - '0');
2296        if (*endptr != 0)
2297        {        {
2298        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
2299          pattern_count, errptr, error);          {
2300        return 2;          char *equals = strchr(op->long_name, '=');
2301            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2302              (int)(equals - op->long_name);
2303            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2304              option_data, nlen, op->long_name);
2305            }
2306          else
2307            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2308              option_data, op->one_char);
2309          pcregrep_exit(usage(2));
2310        }        }
2311        if (op->type == OP_LONGNUMBER)
2312            *((unsigned long int *)op->dataptr) = n;
2313        else
2314            *((int *)op->dataptr) = n;
2315        }
2316      }
2317    
2318    /* Options have been decoded. If -C was used, its value is used as a default
2319    for -A and -B. */
2320    
2321    if (both_context > 0)
2322      {
2323      if (after_context == 0) after_context = both_context;
2324      if (before_context == 0) before_context = both_context;
2325      }
2326    
2327    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2328    However, the latter two set only_matching. */
2329    
2330    if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2331        (file_offsets && line_offsets))
2332      {
2333      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2334        "and/or --line-offsets\n");
2335      pcregrep_exit(usage(2));
2336      }
2337    
2338    if (file_offsets || line_offsets) only_matching = 0;
2339    
2340    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2341    LC_ALL environment variable is set, and if so, use it. */
2342    
2343    if (locale == NULL)
2344      {
2345      locale = getenv("LC_ALL");
2346      locale_from = "LCC_ALL";
2347      }
2348    
2349    if (locale == NULL)
2350      {
2351      locale = getenv("LC_CTYPE");
2352      locale_from = "LC_CTYPE";
2353      }
2354    
2355    /* If a locale has been provided, set it, and generate the tables the PCRE
2356    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2357    
2358    if (locale != NULL)
2359      {
2360      if (setlocale(LC_CTYPE, locale) == NULL)
2361        {
2362        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2363          locale, locale_from);
2364        return 2;
2365        }
2366      pcretables = pcre_maketables();
2367      }
2368    
2369    /* Sort out colouring */
2370    
2371    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2372      {
2373      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2374      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2375      else
2376        {
2377        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2378          colour_option);
2379        return 2;
2380        }
2381      if (do_colour)
2382        {
2383        char *cs = getenv("PCREGREP_COLOUR");
2384        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2385        if (cs != NULL) colour_string = cs;
2386      }      }
   fclose(f);  
2387    }    }
2388    
2389  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2390    
2391    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2392      {
2393      pcre_options |= PCRE_NEWLINE_CR;
2394      endlinetype = EL_CR;
2395      }
2396    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2397      {
2398      pcre_options |= PCRE_NEWLINE_LF;
2399      endlinetype = EL_LF;
2400      }
2401    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2402      {
2403      pcre_options |= PCRE_NEWLINE_CRLF;
2404      endlinetype = EL_CRLF;
2405      }
2406    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2407      {
2408      pcre_options |= PCRE_NEWLINE_ANY;
2409      endlinetype = EL_ANY;
2410      }
2411    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2412      {
2413      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2414      endlinetype = EL_ANYCRLF;
2415      }
2416  else  else
2417    {    {
2418    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2419    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2420    if (pattern_list[0] == NULL)    }
2421    
2422    /* Interpret the text values for -d and -D */
2423    
2424    if (dee_option != NULL)
2425      {
2426      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2427      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2428      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2429      else
2430      {      {
2431      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
       error);  
2432      return 2;      return 2;
2433      }      }
   pattern_count++;  
2434    }    }
2435    
2436  /* Study the regular expressions, as we will be running them may times */  if (DEE_option != NULL)
2437      {
2438      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2439      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2440      else
2441        {
2442        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2443        return 2;
2444        }
2445      }
2446    
2447    /* Check the values for Jeffrey Friedl's debugging options. */
2448    
2449    #ifdef JFRIEDL_DEBUG
2450    if (S_arg > 9)
2451      {
2452      fprintf(stderr, "pcregrep: bad value for -S option\n");
2453      return 2;
2454      }
2455    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2456      {
2457      if (jfriedl_XT == 0) jfriedl_XT = 1;
2458      if (jfriedl_XR == 0) jfriedl_XR = 1;
2459      }
2460    #endif
2461    
2462    /* Get memory to store the pattern and hints lists. */
2463    
2464    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2465    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2466    
2467    if (pattern_list == NULL || hints_list == NULL)
2468      {
2469      fprintf(stderr, "pcregrep: malloc failed\n");
2470      goto EXIT2;
2471      }
2472    
2473    /* If no patterns were provided by -e, and there is no file provided by -f,
2474    the first argument is the one and only pattern, and it must exist. */
2475    
2476    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2477      {
2478      if (i >= argc) return usage(2);
2479      patterns[cmd_pattern_count++] = argv[i++];
2480      }
2481    
2482    /* Compile the patterns that were provided on the command line, either by
2483    multiple uses of -e or as a single unkeyed pattern. */
2484    
2485    for (j = 0; j < cmd_pattern_count; j++)
2486      {
2487      if (!compile_pattern(patterns[j], pcre_options, NULL,
2488           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2489        goto EXIT2;
2490      }
2491    
2492    /* Compile the regular expressions that are provided in a file. */
2493    
2494    if (pattern_filename != NULL)
2495      {
2496      int linenumber = 0;
2497      FILE *f;
2498      char *filename;
2499      char buffer[MBUFTHIRD];
2500    
2501      if (strcmp(pattern_filename, "-") == 0)
2502        {
2503        f = stdin;
2504        filename = stdin_name;
2505        }
2506      else
2507        {
2508        f = fopen(pattern_filename, "r");
2509        if (f == NULL)
2510          {
2511          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2512            strerror(errno));
2513          goto EXIT2;
2514          }
2515        filename = pattern_filename;
2516        }
2517    
2518      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2519        {
2520        char *s = buffer + (int)strlen(buffer);
2521        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2522        *s = 0;
2523        linenumber++;
2524        if (buffer[0] == 0) continue;   /* Skip blank lines */
2525        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2526          goto EXIT2;
2527        }
2528    
2529      if (f != stdin) fclose(f);
2530      }
2531    
2532    /* Study the regular expressions, as we will be running them many times */
2533    
2534  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2535    {    {
# Line 513  for (j = 0; j < pattern_count; j++) Line 2539  for (j = 0; j < pattern_count; j++)
2539      char s[16];      char s[16];
2540      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2541      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2542      return 2;      goto EXIT2;
2543        }
2544      hint_count++;
2545      }
2546    
2547    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2548    pcre_extra block for each pattern. */
2549    
2550    if (match_limit > 0 || match_limit_recursion > 0)
2551      {
2552      for (j = 0; j < pattern_count; j++)
2553        {
2554        if (hints_list[j] == NULL)
2555          {
2556          hints_list[j] = malloc(sizeof(pcre_extra));
2557          if (hints_list[j] == NULL)
2558            {
2559            fprintf(stderr, "pcregrep: malloc failed\n");
2560            pcregrep_exit(2);
2561            }
2562          }
2563        if (match_limit > 0)
2564          {
2565          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2566          hints_list[j]->match_limit = match_limit;
2567          }
2568        if (match_limit_recursion > 0)
2569          {
2570          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2571          hints_list[j]->match_limit_recursion = match_limit_recursion;
2572          }
2573        }
2574      }
2575    
2576    /* If there are include or exclude patterns, compile them. */
2577    
2578    if (exclude_pattern != NULL)
2579      {
2580      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2581        pcretables);
2582      if (exclude_compiled == NULL)
2583        {
2584        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2585          errptr, error);
2586        goto EXIT2;
2587        }
2588      }
2589    
2590    if (include_pattern != NULL)
2591      {
2592      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2593        pcretables);
2594      if (include_compiled == NULL)
2595        {
2596        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2597          errptr, error);
2598        goto EXIT2;
2599        }
2600      }
2601    
2602    if (exclude_dir_pattern != NULL)
2603      {
2604      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2605        pcretables);
2606      if (exclude_dir_compiled == NULL)
2607        {
2608        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2609          errptr, error);
2610        goto EXIT2;
2611        }
2612      }
2613    
2614    if (include_dir_pattern != NULL)
2615      {
2616      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2617        pcretables);
2618      if (include_dir_compiled == NULL)
2619        {
2620        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2621          errptr, error);
2622        goto EXIT2;
2623      }      }
2624    }    }
2625    
2626  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2627    
2628  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2629      {
2630      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2631      goto EXIT;
2632      }
2633    
2634  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2635  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2636  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2637    otherwise forced. */
2638    
2639  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2640    
2641  for (; i < argc; i++)  for (; i < argc; i++)
2642    {    {
2643    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2644    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2645      if (frc > 1) rc = frc;
2646        else if (frc == 0 && rc == 1) rc = 0;
2647    }    }
2648    
2649  return rc;  EXIT:
2650    if (pattern_list != NULL)
2651      {
2652      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2653      free(pattern_list);
2654      }
2655    if (hints_list != NULL)
2656      {
2657      for (i = 0; i < hint_count; i++)
2658        {
2659        if (hints_list[i] != NULL) free(hints_list[i]);
2660        }
2661      free(hints_list);
2662      }
2663    pcregrep_exit(rc);
2664    
2665    EXIT2:
2666    rc = 2;
2667    goto EXIT;
2668  }  }
2669    
2670  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.586

  ViewVC Help
Powered by ViewVC 1.1.5