/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 904 by ph10, Mon Jan 23 17:30:49 2012 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2012 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 70  its pattern matching. On a Unix system i
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "2.0 01-Aug-2001"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define PATBUFSIZE BUFSIZ
78    #else
79    #define PATBUFSIZE 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
118  *               Global variables                 *  *               Global variables                 *
119  *************************************************/  *************************************************/
120    
121    /* Jeffrey Friedl has some debugging requirements that are not part of the
122    regular code. */
123    
124    #ifdef JFRIEDL_DEBUG
125    static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130    #endif
131    
132    static int  endlinetype;
133    
134    static char *colour_string = (char *)"1;31";
135    static char *colour_option = NULL;
136    static char *dee_option = NULL;
137    static char *DEE_option = NULL;
138    static char *main_buffer = NULL;
139    static char *newline = NULL;
140  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
141    static char *stdin_name = (char *)"(standard input)";
142    static char *locale = NULL;
143    
144    static const unsigned char *pcretables = NULL;
145    
146  static int  pattern_count = 0;  static int  pattern_count = 0;
147  static pcre **pattern_list;  static pcre **pattern_list = NULL;
148  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
149    
150    static char *include_pattern = NULL;
151    static char *exclude_pattern = NULL;
152    static char *include_dir_pattern = NULL;
153    static char *exclude_dir_pattern = NULL;
154    
155    static pcre *include_compiled = NULL;
156    static pcre *exclude_compiled = NULL;
157    static pcre *include_dir_compiled = NULL;
158    static pcre *exclude_dir_compiled = NULL;
159    
160    static int after_context = 0;
161    static int before_context = 0;
162    static int both_context = 0;
163    static int bufthird = PCREGREP_BUFSIZE;
164    static int bufsize = 3*PCREGREP_BUFSIZE;
165    static int dee_action = dee_READ;
166    static int DEE_action = DEE_READ;
167    static int error_count = 0;
168    static int filenames = FN_DEFAULT;
169    static int only_matching = -1;
170    static int process_options = 0;
171    
172    #ifdef SUPPORT_PCREGREP_JIT
173    static int study_options = PCRE_STUDY_JIT_COMPILE;
174    #else
175    static int study_options = 0;
176    #endif
177    
178    static unsigned long int match_limit = 0;
179    static unsigned long int match_limit_recursion = 0;
180    
181  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
182  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
183  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
184    static BOOL hyphenpending = FALSE;
185  static BOOL invert = FALSE;  static BOOL invert = FALSE;
186    static BOOL line_buffered = FALSE;
187    static BOOL line_offsets = FALSE;
188    static BOOL multiline = FALSE;
189  static BOOL number = FALSE;  static BOOL number = FALSE;
190  static BOOL recurse = FALSE;  static BOOL omit_zero_count = FALSE;
191    static BOOL resource_error = FALSE;
192    static BOOL quiet = FALSE;
193  static BOOL silent = FALSE;  static BOOL silent = FALSE;
194  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
195    
196  /* Structure for options and list of them */  /* Structure for options and list of them */
197    
198    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
199           OP_OP_NUMBER, OP_PATLIST };
200    
201  typedef struct option_item {  typedef struct option_item {
202      int type;
203    int one_char;    int one_char;
204    char *long_name;    void *dataptr;
205    char *help_text;    const char *long_name;
206      const char *help_text;
207  } option_item;  } option_item;
208    
209    /* Options without a single-letter equivalent get a negative value. This can be
210    used to identify them. */
211    
212    #define N_COLOUR       (-1)
213    #define N_EXCLUDE      (-2)
214    #define N_EXCLUDE_DIR  (-3)
215    #define N_HELP         (-4)
216    #define N_INCLUDE      (-5)
217    #define N_INCLUDE_DIR  (-6)
218    #define N_LABEL        (-7)
219    #define N_LOCALE       (-8)
220    #define N_NULL         (-9)
221    #define N_LOFFSETS     (-10)
222    #define N_FOFFSETS     (-11)
223    #define N_LBUFFER      (-12)
224    #define N_M_LIMIT      (-13)
225    #define N_M_LIMIT_REC  (-14)
226    #define N_BUFSIZE      (-15)
227    #define N_NOJIT        (-16)
228    
229  static option_item optionlist[] = {  static option_item optionlist[] = {
230    { -1,  "help",         "display this help and exit" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
231    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
232    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
233    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
234    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
235    { 'n', "line-number",  "print line number with output lines" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
236    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
237    { 's', "no-messages",  "suppress error messages" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
238    { 'V', "version",      "print version information and exit" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
239    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
240    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
241    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
242    { 0,    NULL,           NULL }    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
243      { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
244      { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
245      { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
246      { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
247      { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
248    #ifdef SUPPORT_PCREGREP_JIT
249      { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
250    #else
251      { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
252    #endif
253      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
254      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
255      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
256      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
257      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
258      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
259      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
260      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
261      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
262      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
263      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
264      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
265      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
266      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
267      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
268      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
269      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
270      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
271    
272      /* These two were accidentally implemented with underscores instead of
273      hyphens in the option names. As this was not discovered for several releases,
274      the incorrect versions are left in the table for compatibility. However, the
275      --help function misses out any option that has an underscore in its name. */
276    
277      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
278      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
279    
280    #ifdef JFRIEDL_DEBUG
281      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
282    #endif
283      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
284      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
285      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
286      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
287      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
288      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
289      { OP_NODATA,    0,        NULL,               NULL,            NULL }
290  };  };
291    
292    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
293    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
294    that the combination of -w and -x has the same effect as -x on its own, so we
295    can treat them as the same. */
296    
297    static const char *prefix[] = {
298      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
299    
300    static const char *suffix[] = {
301      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
302    
303    /* UTF-8 tables - used only when the newline setting is "any". */
304    
305    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
306    
307    const char utf8_table4[] = {
308      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
309      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
310      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
311      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
312    
313    
314    
315    /*************************************************
316    *         Exit from the program                  *
317    *************************************************/
318    
319    /* If there has been a resource error, give a suitable message.
320    
321    Argument:  the return code
322    Returns:   does not return
323    */
324    
325    static void
326    pcregrep_exit(int rc)
327    {
328    if (resource_error)
329      {
330      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
331        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
332        PCRE_ERROR_JIT_STACKLIMIT);
333      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
334      }
335    
336    exit(rc);
337    }
338    
339    
340  /*************************************************  /*************************************************
341  *       Functions for directory scanning         *  *            OS-specific functions               *
342  *************************************************/  *************************************************/
343    
344  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
345  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
346    
347    
348  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
349    
350  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
351  #include <sys/types.h>  #include <sys/types.h>
352  #include <sys/stat.h>  #include <sys/stat.h>
353  #include <dirent.h>  #include <dirent.h>
354    
355  typedef DIR directory_type;  typedef DIR directory_type;
356    
357  int  static int
358  isdirectory(char *filename)  isdirectory(char *filename)
359  {  {
360  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 363  if (stat(filename, &statbuf) < 0)
363  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
364  }  }
365    
366  directory_type *  static directory_type *
367  opendirectory(char *filename)  opendirectory(char *filename)
368  {  {
369  return opendir(filename);  return opendir(filename);
370  }  }
371    
372  char *  static char *
373  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
374  {  {
375  for (;;)  for (;;)
# Line 108  for (;;) Line 379  for (;;)
379    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
380      return dent->d_name;      return dent->d_name;
381    }    }
382    /* Control never reaches here */
383    }
384    
385    static void
386    closedirectory(directory_type *dir)
387    {
388    closedir(dir);
389    }
390    
391    
392    /************* Test for regular file in Unix **********/
393    
394    static int
395    isregfile(char *filename)
396    {
397    struct stat statbuf;
398    if (stat(filename, &statbuf) < 0)
399      return 1;        /* In the expectation that opening as a file will fail */
400    return (statbuf.st_mode & S_IFMT) == S_IFREG;
401    }
402    
403    
404    /************* Test for a terminal in Unix **********/
405    
406    static BOOL
407    is_stdout_tty(void)
408    {
409    return isatty(fileno(stdout));
410    }
411    
412    static BOOL
413    is_file_tty(FILE *f)
414    {
415    return isatty(fileno(f));
416    }
417    
418    
419    /************* Directory scanning in Win32 ***********/
420    
421    /* I (Philip Hazel) have no means of testing this code. It was contributed by
422    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
423    when it did not exist. David Byron added a patch that moved the #include of
424    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
425    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
426    undefined when it is indeed undefined. */
427    
428    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
429    
430    #ifndef STRICT
431    # define STRICT
432    #endif
433    #ifndef WIN32_LEAN_AND_MEAN
434    # define WIN32_LEAN_AND_MEAN
435    #endif
436    
437    #include <windows.h>
438    
439    #ifndef INVALID_FILE_ATTRIBUTES
440    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
441    #endif
442    
443    typedef struct directory_type
444    {
445    HANDLE handle;
446    BOOL first;
447    WIN32_FIND_DATA data;
448    } directory_type;
449    
450    int
451    isdirectory(char *filename)
452    {
453    DWORD attr = GetFileAttributes(filename);
454    if (attr == INVALID_FILE_ATTRIBUTES)
455      return 0;
456    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
457    }
458    
459    directory_type *
460    opendirectory(char *filename)
461    {
462    size_t len;
463    char *pattern;
464    directory_type *dir;
465    DWORD err;
466    len = strlen(filename);
467    pattern = (char *) malloc(len + 3);
468    dir = (directory_type *) malloc(sizeof(*dir));
469    if ((pattern == NULL) || (dir == NULL))
470      {
471      fprintf(stderr, "pcregrep: malloc failed\n");
472      pcregrep_exit(2);
473      }
474    memcpy(pattern, filename, len);
475    memcpy(&(pattern[len]), "\\*", 3);
476    dir->handle = FindFirstFile(pattern, &(dir->data));
477    if (dir->handle != INVALID_HANDLE_VALUE)
478      {
479      free(pattern);
480      dir->first = TRUE;
481      return dir;
482      }
483    err = GetLastError();
484    free(pattern);
485    free(dir);
486    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
487    return NULL;
488    }
489    
490    char *
491    readdirectory(directory_type *dir)
492    {
493    for (;;)
494      {
495      if (!dir->first)
496        {
497        if (!FindNextFile(dir->handle, &(dir->data)))
498          return NULL;
499        }
500      else
501        {
502        dir->first = FALSE;
503        }
504      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
505        return dir->data.cFileName;
506      }
507    #ifndef _MSC_VER
508  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
509    #endif
510  }  }
511    
512  void  void
513  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
514  {  {
515  closedir(dir);  FindClose(dir->handle);
516    free(dir);
517  }  }
518    
519    
520  #else  /************* Test for regular file in Win32 **********/
521    
522    /* I don't know how to do this, or if it can be done; assume all paths are
523    regular if they are not directories. */
524    
525    int isregfile(char *filename)
526    {
527    return !isdirectory(filename);
528    }
529    
530    
531    /************* Test for a terminal in Win32 **********/
532    
533    /* I don't know how to do this; assume never */
534    
535    static BOOL
536    is_stdout_tty(void)
537    {
538    return FALSE;
539    }
540    
541    static BOOL
542    is_file_tty(FILE *f)
543    {
544    return FALSE;
545    }
546    
547    
548  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
549    
550  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
551    
552    #else
553    
554  typedef void directory_type;  typedef void directory_type;
555    
556  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
557  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
558  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
559  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
560    
561    
562    /************* Test for regular when we can't do it **********/
563    
564    /* Assume all files are regular. */
565    
566    int isregfile(char *filename) { return 1; }
567    
568    
569    /************* Test for a terminal when we can't do it **********/
570    
571    static BOOL
572    is_stdout_tty(void)
573    {
574    return FALSE;
575    }
576    
577    static BOOL
578    is_file_tty(FILE *f)
579    {
580    return FALSE;
581    }
582    
583  #endif  #endif
584    
585    
586    
587  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
588  /*************************************************  /*************************************************
589  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
590  *************************************************/  *************************************************/
# Line 159  return sys_errlist[n]; Line 607  return sys_errlist[n];
607    
608    
609  /*************************************************  /*************************************************
610  *              Grep an individual file           *  *            Read one line of input              *
611  *************************************************/  *************************************************/
612    
613  static int  /* Normally, input is read using fread() into a large buffer, so many lines may
614  pcregrep(FILE *in, char *name)  be read at once. However, doing this for tty input means that no output appears
615    until a lot of input has been typed. Instead, tty input is handled line by
616    line. We cannot use fgets() for this, because it does not stop at a binary
617    zero, and therefore there is no way of telling how many characters it has read,
618    because there may be binary zeros embedded in the data.
619    
620    Arguments:
621      buffer     the buffer to read into
622      length     the maximum number of characters to read
623      f          the file
624    
625    Returns:     the number of characters read, zero at end of file
626    */
627    
628    static unsigned int
629    read_one_line(char *buffer, int length, FILE *f)
630  {  {
631  int rc = 1;  int c;
632  int linenumber = 0;  int yield = 0;
633  int count = 0;  while ((c = fgetc(f)) != EOF)
634  int offsets[99];    {
635  char buffer[BUFSIZ];    buffer[yield++] = c;
636      if (c == '\n' || yield >= length) break;
637      }
638    return yield;
639    }
640    
641    
642    
643    /*************************************************
644    *             Find end of line                   *
645    *************************************************/
646    
647    /* The length of the endline sequence that is found is set via lenptr. This may
648    be zero at the very end of the file if there is no line-ending sequence there.
649    
650  while (fgets(buffer, sizeof(buffer), in) != NULL)  Arguments:
651      p         current position in line
652      endptr    end of available data
653      lenptr    where to put the length of the eol sequence
654    
655    Returns:    pointer after the last byte of the line,
656                including the newline byte(s)
657    */
658    
659    static char *
660    end_of_line(char *p, char *endptr, int *lenptr)
661    {
662    switch(endlinetype)
663    {    {
664    BOOL match = FALSE;    default:      /* Just in case */
665    int i;    case EL_LF:
666    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
667    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
668    linenumber++;      {
669        *lenptr = 1;
670        return p + 1;
671        }
672      *lenptr = 0;
673      return endptr;
674    
675    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
676      while (p < endptr && *p != '\r') p++;
677      if (p < endptr)
678      {      {
679      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
680        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
681      }      }
682      *lenptr = 0;
683      return endptr;
684    
685    if (match != invert)    case EL_CRLF:
686      for (;;)
687      {      {
688      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
689        if (++p >= endptr)
690          {
691          *lenptr = 0;
692          return endptr;
693          }
694        if (*p == '\n')
695          {
696          *lenptr = 2;
697          return p + 1;
698          }
699        }
700      break;
701    
702      case EL_ANYCRLF:
703      while (p < endptr)
704        {
705        int extra = 0;
706        register int c = *((unsigned char *)p);
707    
708      else if (filenames_only)      if (utf8 && c >= 0xc0)
709        {        {
710        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
711        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
712          gcss = 6*extra;
713          c = (c & utf8_table3[extra]) << gcss;
714          for (gcii = 1; gcii <= extra; gcii++)
715            {
716            gcss -= 6;
717            c |= (p[gcii] & 0x3f) << gcss;
718            }
719        }        }
720    
721      else if (silent) return 0;      p += 1 + extra;
722    
723      else      switch (c)
724        {        {
725        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
726        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
727        fprintf(stdout, "%s\n", buffer);        return p;
728    
729          case 0x0d:    /* CR */
730          if (p < endptr && *p == 0x0a)
731            {
732            *lenptr = 2;
733            p++;
734            }
735          else *lenptr = 1;
736          return p;
737    
738          default:
739          break;
740        }        }
741        }   /* End of loop for ANYCRLF case */
742    
743      rc = 0;    *lenptr = 0;  /* Must have hit the end */
744      }    return endptr;
   }  
745    
746  if (count_only)    case EL_ANY:
747    {    while (p < endptr)
748    if (name != NULL) fprintf(stdout, "%s:", name);      {
749    fprintf(stdout, "%d\n", count);      int extra = 0;
750    }      register int c = *((unsigned char *)p);
751    
752  return rc;      if (utf8 && c >= 0xc0)
753  }        {
754          int gcii, gcss;
755          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
756          gcss = 6*extra;
757          c = (c & utf8_table3[extra]) << gcss;
758          for (gcii = 1; gcii <= extra; gcii++)
759            {
760            gcss -= 6;
761            c |= (p[gcii] & 0x3f) << gcss;
762            }
763          }
764    
765        p += 1 + extra;
766    
767        switch (c)
768          {
769          case 0x0a:    /* LF */
770          case 0x0b:    /* VT */
771          case 0x0c:    /* FF */
772          *lenptr = 1;
773          return p;
774    
775          case 0x0d:    /* CR */
776          if (p < endptr && *p == 0x0a)
777            {
778            *lenptr = 2;
779            p++;
780            }
781          else *lenptr = 1;
782          return p;
783    
784          case 0x85:    /* NEL */
785          *lenptr = utf8? 2 : 1;
786          return p;
787    
788          case 0x2028:  /* LS */
789          case 0x2029:  /* PS */
790          *lenptr = 3;
791          return p;
792    
793          default:
794          break;
795          }
796        }   /* End of loop for ANY case */
797    
798      *lenptr = 0;  /* Must have hit the end */
799      return endptr;
800      }     /* End of overall switch */
801    }
802    
803    
804    
805  /*************************************************  /*************************************************
806  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
807  *************************************************/  *************************************************/
808    
809  static int  /* This is called when looking back for before lines to print.
810  grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  
811    BOOL only_one_at_top)  Arguments:
812  {    p         start of the subsequent line
813  int rc = 1;    startptr  start of available data
 int sep;  
 FILE *in;  
814    
815  /* If the file is a directory and we are recursing, scan each file within it.  Returns:    pointer to the start of the previous line
816  The scanning code is localized so it can be made system-specific. */  */
817    
818  if ((sep = isdirectory(filename)) != 0 && recurse)  static char *
819    previous_line(char *p, char *startptr)
820    {
821    switch(endlinetype)
822    {    {
823    char buffer[1024];    default:      /* Just in case */
824    char *nextfile;    case EL_LF:
825    directory_type *dir = opendirectory(filename);    p--;
826      while (p > startptr && p[-1] != '\n') p--;
827      return p;
828    
829      case EL_CR:
830      p--;
831      while (p > startptr && p[-1] != '\n') p--;
832      return p;
833    
834    if (dir == NULL)    case EL_CRLF:
835      for (;;)
836      {      {
837      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,      p -= 2;
838        strerror(errno));      while (p > startptr && p[-1] != '\n') p--;
839      return 2;      if (p <= startptr + 1 || p[-2] == '\r') return p;
840      }      }
841      return p;   /* But control should never get here */
842    
843      case EL_ANY:
844      case EL_ANYCRLF:
845      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
846      if (utf8) while ((*p & 0xc0) == 0x80) p--;
847    
848    while ((nextfile = readdirectory(dir)) != NULL)    while (p > startptr)
849      {      {
850      int frc;      register int c;
851      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      char *pp = p - 1;
     frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);  
     if (frc == 0 && rc == 1) rc = 0;  
     }  
852    
853    closedirectory(dir);      if (utf8)
854    return rc;        {
855    }        int extra = 0;
856          while ((*pp & 0xc0) == 0x80) pp--;
857          c = *((unsigned char *)pp);
858          if (c >= 0xc0)
859            {
860            int gcii, gcss;
861            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
862            gcss = 6*extra;
863            c = (c & utf8_table3[extra]) << gcss;
864            for (gcii = 1; gcii <= extra; gcii++)
865              {
866              gcss -= 6;
867              c |= (pp[gcii] & 0x3f) << gcss;
868              }
869            }
870          }
871        else c = *((unsigned char *)pp);
872    
873  /* If the file is not a directory, or we are not recursing, scan it. If this is      if (endlinetype == EL_ANYCRLF) switch (c)
874  the first and only argument at top level, we don't show the file name.        {
875  Otherwise, control is via the show_filenames variable. */        case 0x0a:    /* LF */
876          case 0x0d:    /* CR */
877          return p;
878    
879  in = fopen(filename, "r");        default:
880  if (in == NULL)        break;
881    {        }
   fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));  
   return 2;  
   }  
882    
883  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);      else switch (c)
884  fclose(in);        {
885  return rc;        case 0x0a:    /* LF */
886          case 0x0b:    /* VT */
887          case 0x0c:    /* FF */
888          case 0x0d:    /* CR */
889          case 0x85:    /* NEL */
890          case 0x2028:  /* LS */
891          case 0x2029:  /* PS */
892          return p;
893    
894          default:
895          break;
896          }
897    
898        p = pp;  /* Back one character */
899        }        /* End of loop for ANY case */
900    
901      return startptr;  /* Hit start of data */
902      }     /* End of overall switch */
903  }  }
904    
905    
906    
907    
908    
909  /*************************************************  /*************************************************
910  *                Usage function                  *  *       Print the previous "after" lines         *
911  *************************************************/  *************************************************/
912    
913  static int  /* This is called if we are about to lose said lines because of buffer filling,
914  usage(int rc)  and at the end of the file. The data in the line is written using fwrite() so
915    that a binary zero does not terminate it.
916    
917    Arguments:
918      lastmatchnumber   the number of the last matching line, plus one
919      lastmatchrestart  where we restarted after the last match
920      endptr            end of available data
921      printname         filename for printing
922    
923    Returns:            nothing
924    */
925    
926    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
927      char *endptr, char *printname)
928    {
929    if (after_context > 0 && lastmatchnumber > 0)
930      {
931      int count = 0;
932      while (lastmatchrestart < endptr && count++ < after_context)
933        {
934        int ellength;
935        char *pp = lastmatchrestart;
936        if (printname != NULL) fprintf(stdout, "%s-", printname);
937        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
938        pp = end_of_line(pp, endptr, &ellength);
939        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
940        lastmatchrestart = pp;
941        }
942      hyphenpending = TRUE;
943      }
944    }
945    
946    
947    
948    /*************************************************
949    *   Apply patterns to subject till one matches   *
950    *************************************************/
951    
952    /* This function is called to run through all patterns, looking for a match. It
953    is used multiple times for the same subject when colouring is enabled, in order
954    to find all possible matches.
955    
956    Arguments:
957      matchptr     the start of the subject
958      length       the length of the subject to match
959      startoffset  where to start matching
960      offsets      the offets vector to fill in
961      mrc          address of where to put the result of pcre_exec()
962    
963    Returns:      TRUE if there was a match
964                  FALSE if there was no match
965                  invert if there was a non-fatal error
966    */
967    
968    static BOOL
969    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
970      int *mrc)
971    {
972    int i;
973    size_t slen = length;
974    const char *msg = "this text:\n\n";
975    if (slen > 200)
976      {
977      slen = 200;
978      msg = "text that starts:\n\n";
979      }
980    for (i = 0; i < pattern_count; i++)
981      {
982      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
983        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
984      if (*mrc >= 0) return TRUE;
985      if (*mrc == PCRE_ERROR_NOMATCH) continue;
986      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
987      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
988      fprintf(stderr, "%s", msg);
989      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
990      fprintf(stderr, "\n\n");
991      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
992          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
993        resource_error = TRUE;
994      if (error_count++ > 20)
995        {
996        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
997        pcregrep_exit(2);
998        }
999      return invert;    /* No more matching; don't show the line again */
1000      }
1001    
1002    return FALSE;  /* No match, no errors */
1003    }
1004    
1005    
1006    
1007    /*************************************************
1008    *            Grep an individual file             *
1009    *************************************************/
1010    
1011    /* This is called from grep_or_recurse() below. It uses a buffer that is three
1012    times the value of bufthird. The matching point is never allowed to stray into
1013    the top third of the buffer, thus keeping more of the file available for
1014    context printing or for multiline scanning. For large files, the pointer will
1015    be in the middle third most of the time, so the bottom third is available for
1016    "before" context printing.
1017    
1018    Arguments:
1019      handle       the fopened FILE stream for a normal file
1020                   the gzFile pointer when reading is via libz
1021                   the BZFILE pointer when reading is via libbz2
1022      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1023      filename     the file name or NULL (for errors)
1024      printname    the file name if it is to be printed for each match
1025                   or NULL if the file name is not to be printed
1026                   it cannot be NULL if filenames[_nomatch]_only is set
1027    
1028    Returns:       0 if there was at least one match
1029                   1 otherwise (no matches)
1030                   2 if an overlong line is encountered
1031                   3 if there is a read error on a .bz2 file
1032    */
1033    
1034    static int
1035    pcregrep(void *handle, int frtype, char *filename, char *printname)
1036  {  {
1037  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  int rc = 1;
1038  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  int linenumber = 1;
1039    int lastmatchnumber = 0;
1040    int count = 0;
1041    int filepos = 0;
1042    int offsets[OFFSET_SIZE];
1043    char *lastmatchrestart = NULL;
1044    char *ptr = main_buffer;
1045    char *endptr;
1046    size_t bufflength;
1047    BOOL endhyphenpending = FALSE;
1048    BOOL input_line_buffered = line_buffered;
1049    FILE *in = NULL;                    /* Ensure initialized */
1050    
1051    #ifdef SUPPORT_LIBZ
1052    gzFile ingz = NULL;
1053    #endif
1054    
1055    #ifdef SUPPORT_LIBBZ2
1056    BZFILE *inbz2 = NULL;
1057    #endif
1058    
1059    
1060    /* Do the first read into the start of the buffer and set up the pointer to end
1061    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1062    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1063    fail. */
1064    
1065    #ifdef SUPPORT_LIBZ
1066    if (frtype == FR_LIBZ)
1067      {
1068      ingz = (gzFile)handle;
1069      bufflength = gzread (ingz, main_buffer, bufsize);
1070      }
1071    else
1072    #endif
1073    
1074    #ifdef SUPPORT_LIBBZ2
1075    if (frtype == FR_LIBBZ2)
1076      {
1077      inbz2 = (BZFILE *)handle;
1078      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1079      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1080      }                                    /* without the cast it is unsigned. */
1081    else
1082    #endif
1083    
1084      {
1085      in = (FILE *)handle;
1086      if (is_file_tty(in)) input_line_buffered = TRUE;
1087      bufflength = input_line_buffered?
1088        read_one_line(main_buffer, bufsize, in) :
1089        fread(main_buffer, 1, bufsize, in);
1090      }
1091    
1092    endptr = main_buffer + bufflength;
1093    
1094    /* Loop while the current pointer is not at the end of the file. For large
1095    files, endptr will be at the end of the buffer when we are in the middle of the
1096    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1097    way, the buffer is shifted left and re-filled. */
1098    
1099    while (ptr < endptr)
1100      {
1101      int endlinelength;
1102      int mrc = 0;
1103      int startoffset = 0;
1104      BOOL match;
1105      char *matchptr = ptr;
1106      char *t = ptr;
1107      size_t length, linelength;
1108    
1109      /* At this point, ptr is at the start of a line. We need to find the length
1110      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1111      length remainder of the data in the buffer. Otherwise, it is the length of
1112      the next line, excluding the terminating newline. After matching, we always
1113      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1114      option is used for compiling, so that any match is constrained to be in the
1115      first line. */
1116    
1117      t = end_of_line(t, endptr, &endlinelength);
1118      linelength = t - ptr - endlinelength;
1119      length = multiline? (size_t)(endptr - ptr) : linelength;
1120    
1121      /* Check to see if the line we are looking at extends right to the very end
1122      of the buffer without a line terminator. This means the line is too long to
1123      handle. */
1124    
1125      if (endlinelength == 0 && t == main_buffer + bufsize)
1126        {
1127        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1128                        "pcregrep: check the --buffer-size option\n",
1129                        linenumber,
1130                        (filename == NULL)? "" : " of file ",
1131                        (filename == NULL)? "" : filename);
1132        return 2;
1133        }
1134    
1135      /* Extra processing for Jeffrey Friedl's debugging. */
1136    
1137    #ifdef JFRIEDL_DEBUG
1138      if (jfriedl_XT || jfriedl_XR)
1139      {
1140          #include <sys/time.h>
1141          #include <time.h>
1142          struct timeval start_time, end_time;
1143          struct timezone dummy;
1144          int i;
1145    
1146          if (jfriedl_XT)
1147          {
1148              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1149              const char *orig = ptr;
1150              ptr = malloc(newlen + 1);
1151              if (!ptr) {
1152                      printf("out of memory");
1153                      pcregrep_exit(2);
1154              }
1155              endptr = ptr;
1156              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1157              for (i = 0; i < jfriedl_XT; i++) {
1158                      strncpy(endptr, orig,  length);
1159                      endptr += length;
1160              }
1161              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1162              length = newlen;
1163          }
1164    
1165          if (gettimeofday(&start_time, &dummy) != 0)
1166                  perror("bad gettimeofday");
1167    
1168    
1169          for (i = 0; i < jfriedl_XR; i++)
1170              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1171                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1172    
1173          if (gettimeofday(&end_time, &dummy) != 0)
1174                  perror("bad gettimeofday");
1175    
1176          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1177                          -
1178                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1179    
1180          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1181          return 0;
1182      }
1183    #endif
1184    
1185      /* We come back here after a match when the -o option (only_matching) is set,
1186      in order to find any further matches in the same line. */
1187    
1188      ONLY_MATCHING_RESTART:
1189    
1190      /* Run through all the patterns until one matches or there is an error other
1191      than NOMATCH. This code is in a subroutine so that it can be re-used for
1192      finding subsequent matches when colouring matched lines. */
1193    
1194      match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1195    
1196      /* If it's a match or a not-match (as required), do what's wanted. */
1197    
1198      if (match != invert)
1199        {
1200        BOOL hyphenprinted = FALSE;
1201    
1202        /* We've failed if we want a file that doesn't have any matches. */
1203    
1204        if (filenames == FN_NOMATCH_ONLY) return 1;
1205    
1206        /* Just count if just counting is wanted. */
1207    
1208        if (count_only) count++;
1209    
1210        /* If all we want is a file name, there is no need to scan any more lines
1211        in the file. */
1212    
1213        else if (filenames == FN_MATCH_ONLY)
1214          {
1215          fprintf(stdout, "%s\n", printname);
1216          return 0;
1217          }
1218    
1219        /* Likewise, if all we want is a yes/no answer. */
1220    
1221        else if (quiet) return 0;
1222    
1223        /* The --only-matching option prints just the substring that matched, or a
1224        captured portion of it, as long as this string is not empty, and the
1225        --file-offsets and --line-offsets options output offsets for the matching
1226        substring (they both force --only-matching = 0). None of these options
1227        prints any context. Afterwards, adjust the start and then jump back to look
1228        for further matches in the same line. If we are in invert mode, however,
1229        nothing is printed and we do not restart - this could still be useful
1230        because the return code is set. */
1231    
1232        else if (only_matching >= 0)
1233          {
1234          if (!invert)
1235            {
1236            if (printname != NULL) fprintf(stdout, "%s:", printname);
1237            if (number) fprintf(stdout, "%d:", linenumber);
1238            if (line_offsets)
1239              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1240                offsets[1] - offsets[0]);
1241            else if (file_offsets)
1242              fprintf(stdout, "%d,%d\n",
1243                (int)(filepos + matchptr + offsets[0] - ptr),
1244                offsets[1] - offsets[0]);
1245            else if (only_matching < mrc)
1246              {
1247              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1248              if (plen > 0)
1249                {
1250                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1251                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1252                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1253                fprintf(stdout, "\n");
1254                }
1255              }
1256            else if (printname != NULL || number) fprintf(stdout, "\n");
1257            match = FALSE;
1258            if (line_buffered) fflush(stdout);
1259            rc = 0;                      /* Had some success */
1260            startoffset = offsets[1];    /* Restart after the match */
1261            goto ONLY_MATCHING_RESTART;
1262            }
1263          }
1264    
1265        /* This is the default case when none of the above options is set. We print
1266        the matching lines(s), possibly preceded and/or followed by other lines of
1267        context. */
1268    
1269        else
1270          {
1271          /* See if there is a requirement to print some "after" lines from a
1272          previous match. We never print any overlaps. */
1273    
1274          if (after_context > 0 && lastmatchnumber > 0)
1275            {
1276            int ellength;
1277            int linecount = 0;
1278            char *p = lastmatchrestart;
1279    
1280            while (p < ptr && linecount < after_context)
1281              {
1282              p = end_of_line(p, ptr, &ellength);
1283              linecount++;
1284              }
1285    
1286            /* It is important to advance lastmatchrestart during this printing so
1287            that it interacts correctly with any "before" printing below. Print
1288            each line's data using fwrite() in case there are binary zeroes. */
1289    
1290            while (lastmatchrestart < p)
1291              {
1292              char *pp = lastmatchrestart;
1293              if (printname != NULL) fprintf(stdout, "%s-", printname);
1294              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1295              pp = end_of_line(pp, endptr, &ellength);
1296              FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1297              lastmatchrestart = pp;
1298              }
1299            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1300            }
1301    
1302          /* If there were non-contiguous lines printed above, insert hyphens. */
1303    
1304          if (hyphenpending)
1305            {
1306            fprintf(stdout, "--\n");
1307            hyphenpending = FALSE;
1308            hyphenprinted = TRUE;
1309            }
1310    
1311          /* See if there is a requirement to print some "before" lines for this
1312          match. Again, don't print overlaps. */
1313    
1314          if (before_context > 0)
1315            {
1316            int linecount = 0;
1317            char *p = ptr;
1318    
1319            while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1320                   linecount < before_context)
1321              {
1322              linecount++;
1323              p = previous_line(p, main_buffer);
1324              }
1325    
1326            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1327              fprintf(stdout, "--\n");
1328    
1329            while (p < ptr)
1330              {
1331              int ellength;
1332              char *pp = p;
1333              if (printname != NULL) fprintf(stdout, "%s-", printname);
1334              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1335              pp = end_of_line(pp, endptr, &ellength);
1336              FWRITE(p, 1, pp - p, stdout);
1337              p = pp;
1338              }
1339            }
1340    
1341          /* Now print the matching line(s); ensure we set hyphenpending at the end
1342          of the file if any context lines are being output. */
1343    
1344          if (after_context > 0 || before_context > 0)
1345            endhyphenpending = TRUE;
1346    
1347          if (printname != NULL) fprintf(stdout, "%s:", printname);
1348          if (number) fprintf(stdout, "%d:", linenumber);
1349    
1350          /* In multiline mode, we want to print to the end of the line in which
1351          the end of the matched string is found, so we adjust linelength and the
1352          line number appropriately, but only when there actually was a match
1353          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1354          the match will always be before the first newline sequence. */
1355    
1356          if (multiline & !invert)
1357            {
1358            char *endmatch = ptr + offsets[1];
1359            t = ptr;
1360            while (t < endmatch)
1361              {
1362              t = end_of_line(t, endptr, &endlinelength);
1363              if (t < endmatch) linenumber++; else break;
1364              }
1365            linelength = t - ptr - endlinelength;
1366            }
1367    
1368          /*** NOTE: Use only fwrite() to output the data line, so that binary
1369          zeroes are treated as just another data character. */
1370    
1371          /* This extra option, for Jeffrey Friedl's debugging requirements,
1372          replaces the matched string, or a specific captured string if it exists,
1373          with X. When this happens, colouring is ignored. */
1374    
1375    #ifdef JFRIEDL_DEBUG
1376          if (S_arg >= 0 && S_arg < mrc)
1377            {
1378            int first = S_arg * 2;
1379            int last  = first + 1;
1380            FWRITE(ptr, 1, offsets[first], stdout);
1381            fprintf(stdout, "X");
1382            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1383            }
1384          else
1385    #endif
1386    
1387          /* We have to split the line(s) up if colouring, and search for further
1388          matches, but not of course if the line is a non-match. */
1389    
1390          if (do_colour && !invert)
1391            {
1392            int plength;
1393            FWRITE(ptr, 1, offsets[0], stdout);
1394            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1395            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1396            fprintf(stdout, "%c[00m", 0x1b);
1397            for (;;)
1398              {
1399              startoffset = offsets[1];
1400              if (startoffset >= (int)linelength + endlinelength ||
1401                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1402                break;
1403              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1404              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1405              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1406              fprintf(stdout, "%c[00m", 0x1b);
1407              }
1408    
1409            /* In multiline mode, we may have already printed the complete line
1410            and its line-ending characters (if they matched the pattern), so there
1411            may be no more to print. */
1412    
1413            plength = (int)((linelength + endlinelength) - startoffset);
1414            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1415            }
1416    
1417          /* Not colouring; no need to search for further matches */
1418    
1419          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1420          }
1421    
1422        /* End of doing what has to be done for a match. If --line-buffered was
1423        given, flush the output. */
1424    
1425        if (line_buffered) fflush(stdout);
1426        rc = 0;    /* Had some success */
1427    
1428        /* Remember where the last match happened for after_context. We remember
1429        where we are about to restart, and that line's number. */
1430    
1431        lastmatchrestart = ptr + linelength + endlinelength;
1432        lastmatchnumber = linenumber + 1;
1433        }
1434    
1435      /* For a match in multiline inverted mode (which of course did not cause
1436      anything to be printed), we have to move on to the end of the match before
1437      proceeding. */
1438    
1439      if (multiline && invert && match)
1440        {
1441        int ellength;
1442        char *endmatch = ptr + offsets[1];
1443        t = ptr;
1444        while (t < endmatch)
1445          {
1446          t = end_of_line(t, endptr, &ellength);
1447          if (t <= endmatch) linenumber++; else break;
1448          }
1449        endmatch = end_of_line(endmatch, endptr, &ellength);
1450        linelength = endmatch - ptr - ellength;
1451        }
1452    
1453      /* Advance to after the newline and increment the line number. The file
1454      offset to the current line is maintained in filepos. */
1455    
1456      ptr += linelength + endlinelength;
1457      filepos += (int)(linelength + endlinelength);
1458      linenumber++;
1459    
1460      /* If input is line buffered, and the buffer is not yet full, read another
1461      line and add it into the buffer. */
1462    
1463      if (input_line_buffered && bufflength < (size_t)bufsize)
1464        {
1465        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1466        bufflength += add;
1467        endptr += add;
1468        }
1469    
1470      /* If we haven't yet reached the end of the file (the buffer is full), and
1471      the current point is in the top 1/3 of the buffer, slide the buffer down by
1472      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1473      about to be lost, print them. */
1474    
1475      if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1476        {
1477        if (after_context > 0 &&
1478            lastmatchnumber > 0 &&
1479            lastmatchrestart < main_buffer + bufthird)
1480          {
1481          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1482          lastmatchnumber = 0;
1483          }
1484    
1485        /* Now do the shuffle */
1486    
1487        memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1488        ptr -= bufthird;
1489    
1490    #ifdef SUPPORT_LIBZ
1491        if (frtype == FR_LIBZ)
1492          bufflength = 2*bufthird +
1493            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1494        else
1495    #endif
1496    
1497    #ifdef SUPPORT_LIBBZ2
1498        if (frtype == FR_LIBBZ2)
1499          bufflength = 2*bufthird +
1500            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1501        else
1502    #endif
1503    
1504        bufflength = 2*bufthird +
1505          (input_line_buffered?
1506           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1507           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1508        endptr = main_buffer + bufflength;
1509    
1510        /* Adjust any last match point */
1511    
1512        if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1513        }
1514      }     /* Loop through the whole file */
1515    
1516    /* End of file; print final "after" lines if wanted; do_after_lines sets
1517    hyphenpending if it prints something. */
1518    
1519    if (only_matching < 0 && !count_only)
1520      {
1521      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1522      hyphenpending |= endhyphenpending;
1523      }
1524    
1525    /* Print the file name if we are looking for those without matches and there
1526    were none. If we found a match, we won't have got this far. */
1527    
1528    if (filenames == FN_NOMATCH_ONLY)
1529      {
1530      fprintf(stdout, "%s\n", printname);
1531      return 0;
1532      }
1533    
1534    /* Print the match count if wanted */
1535    
1536    if (count_only)
1537      {
1538      if (count > 0 || !omit_zero_count)
1539        {
1540        if (printname != NULL && filenames != FN_NONE)
1541          fprintf(stdout, "%s:", printname);
1542        fprintf(stdout, "%d\n", count);
1543        }
1544      }
1545    
1546    return rc;
1547    }
1548    
1549    
1550    
1551    /*************************************************
1552    *     Grep a file or recurse into a directory    *
1553    *************************************************/
1554    
1555    /* Given a path name, if it's a directory, scan all the files if we are
1556    recursing; if it's a file, grep it.
1557    
1558    Arguments:
1559      pathname          the path to investigate
1560      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1561      only_one_at_top   TRUE if the path is the only one at toplevel
1562    
1563    Returns:   0 if there was at least one match
1564               1 if there were no matches
1565               2 there was some kind of error
1566    
1567    However, file opening failures are suppressed if "silent" is set.
1568    */
1569    
1570    static int
1571    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1572    {
1573    int rc = 1;
1574    int sep;
1575    int frtype;
1576    void *handle;
1577    FILE *in = NULL;           /* Ensure initialized */
1578    
1579    #ifdef SUPPORT_LIBZ
1580    gzFile ingz = NULL;
1581    #endif
1582    
1583    #ifdef SUPPORT_LIBBZ2
1584    BZFILE *inbz2 = NULL;
1585    #endif
1586    
1587    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1588    int pathlen;
1589    #endif
1590    
1591    /* If the file name is "-" we scan stdin */
1592    
1593    if (strcmp(pathname, "-") == 0)
1594      {
1595      return pcregrep(stdin, FR_PLAIN, stdin_name,
1596        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1597          stdin_name : NULL);
1598      }
1599    
1600    /* If the file is a directory, skip if skipping or if we are recursing, scan
1601    each file and directory within it, subject to any include or exclude patterns
1602    that were set. The scanning code is localized so it can be made
1603    system-specific. */
1604    
1605    if ((sep = isdirectory(pathname)) != 0)
1606      {
1607      if (dee_action == dee_SKIP) return 1;
1608      if (dee_action == dee_RECURSE)
1609        {
1610        char buffer[1024];
1611        char *nextfile;
1612        directory_type *dir = opendirectory(pathname);
1613    
1614        if (dir == NULL)
1615          {
1616          if (!silent)
1617            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1618              strerror(errno));
1619          return 2;
1620          }
1621    
1622        while ((nextfile = readdirectory(dir)) != NULL)
1623          {
1624          int frc, nflen;
1625          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1626          nflen = (int)(strlen(nextfile));
1627    
1628          if (isdirectory(buffer))
1629            {
1630            if (exclude_dir_compiled != NULL &&
1631                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1632              continue;
1633    
1634            if (include_dir_compiled != NULL &&
1635                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1636              continue;
1637            }
1638          else
1639            {
1640            if (exclude_compiled != NULL &&
1641                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1642              continue;
1643    
1644            if (include_compiled != NULL &&
1645                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1646              continue;
1647            }
1648    
1649          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1650          if (frc > 1) rc = frc;
1651           else if (frc == 0 && rc == 1) rc = 0;
1652          }
1653    
1654        closedirectory(dir);
1655        return rc;
1656        }
1657      }
1658    
1659    /* If the file is not a directory and not a regular file, skip it if that's
1660    been requested. */
1661    
1662    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1663    
1664    /* Control reaches here if we have a regular file, or if we have a directory
1665    and recursion or skipping was not requested, or if we have anything else and
1666    skipping was not requested. The scan proceeds. If this is the first and only
1667    argument at top level, we don't show the file name, unless we are only showing
1668    the file name, or the filename was forced (-H). */
1669    
1670    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1671    pathlen = (int)(strlen(pathname));
1672    #endif
1673    
1674    /* Open using zlib if it is supported and the file name ends with .gz. */
1675    
1676    #ifdef SUPPORT_LIBZ
1677    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1678      {
1679      ingz = gzopen(pathname, "rb");
1680      if (ingz == NULL)
1681        {
1682        if (!silent)
1683          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1684            strerror(errno));
1685        return 2;
1686        }
1687      handle = (void *)ingz;
1688      frtype = FR_LIBZ;
1689      }
1690    else
1691    #endif
1692    
1693    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1694    
1695    #ifdef SUPPORT_LIBBZ2
1696    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1697      {
1698      inbz2 = BZ2_bzopen(pathname, "rb");
1699      handle = (void *)inbz2;
1700      frtype = FR_LIBBZ2;
1701      }
1702    else
1703    #endif
1704    
1705    /* Otherwise use plain fopen(). The label is so that we can come back here if
1706    an attempt to read a .bz2 file indicates that it really is a plain file. */
1707    
1708    #ifdef SUPPORT_LIBBZ2
1709    PLAIN_FILE:
1710    #endif
1711      {
1712      in = fopen(pathname, "rb");
1713      handle = (void *)in;
1714      frtype = FR_PLAIN;
1715      }
1716    
1717    /* All the opening methods return errno when they fail. */
1718    
1719    if (handle == NULL)
1720      {
1721      if (!silent)
1722        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1723          strerror(errno));
1724      return 2;
1725      }
1726    
1727    /* Now grep the file */
1728    
1729    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1730      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1731    
1732    /* Close in an appropriate manner. */
1733    
1734    #ifdef SUPPORT_LIBZ
1735    if (frtype == FR_LIBZ)
1736      gzclose(ingz);
1737    else
1738    #endif
1739    
1740    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1741    read failed. If the error indicates that the file isn't in fact bzipped, try
1742    again as a normal file. */
1743    
1744    #ifdef SUPPORT_LIBBZ2
1745    if (frtype == FR_LIBBZ2)
1746      {
1747      if (rc == 3)
1748        {
1749        int errnum;
1750        const char *err = BZ2_bzerror(inbz2, &errnum);
1751        if (errnum == BZ_DATA_ERROR_MAGIC)
1752          {
1753          BZ2_bzclose(inbz2);
1754          goto PLAIN_FILE;
1755          }
1756        else if (!silent)
1757          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1758            pathname, err);
1759        rc = 2;    /* The normal "something went wrong" code */
1760        }
1761      BZ2_bzclose(inbz2);
1762      }
1763    else
1764    #endif
1765    
1766    /* Normal file close */
1767    
1768    fclose(in);
1769    
1770    /* Pass back the yield from pcregrep(). */
1771    
1772    return rc;
1773    }
1774    
1775    
1776    
1777    
1778    /*************************************************
1779    *                Usage function                  *
1780    *************************************************/
1781    
1782    static int
1783    usage(int rc)
1784    {
1785    option_item *op;
1786    fprintf(stderr, "Usage: pcregrep [-");
1787    for (op = optionlist; op->one_char != 0; op++)
1788      {
1789      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1790      }
1791    fprintf(stderr, "] [long options] [pattern] [files]\n");
1792    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1793      "options.\n");
1794  return rc;  return rc;
1795  }  }
1796    
# Line 304  help(void) Line 1806  help(void)
1806  {  {
1807  option_item *op;  option_item *op;
1808    
1809  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1810  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1811  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1812    printf("\"-\" can be used as a file name to mean STDIN.\n");
1813    
1814    #ifdef SUPPORT_LIBZ
1815    printf("Files whose names end in .gz are read using zlib.\n");
1816    #endif
1817    
1818    #ifdef SUPPORT_LIBBZ2
1819    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1820    #endif
1821    
1822    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1823    printf("Other files and the standard input are read as plain files.\n\n");
1824    #else
1825    printf("All files are read as plain files, without any interpretation.\n\n");
1826    #endif
1827    
1828    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1829  printf("Options:\n");  printf("Options:\n");
1830    
1831  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
1832    {    {
1833    int n;    int n;
1834    char s[4];    char s[4];
1835    
1836      /* Two options were accidentally implemented and documented with underscores
1837      instead of hyphens in their names, something that was not noticed for quite a
1838      few releases. When fixing this, I left the underscored versions in the list
1839      in case people were using them. However, we don't want to display them in the
1840      help data. There are no other options that contain underscores, and we do not
1841      expect ever to implement such options. Therefore, just omit any option that
1842      contains an underscore. */
1843    
1844      if (strchr(op->long_name, '_') != NULL) continue;
1845    
1846    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1847    printf("  %s --%s%n", s, op->long_name, &n);    n = 31 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1848    if (n < 1) n = 1;    if (n < 1) n = 1;
1849    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1850    }    }
1851    
1852  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1853  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1854  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("When reading patterns from a file instead of using a command line option,\n");
1855  printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("trailing white space is removed and blank lines are ignored.\n");
1856    printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1857      MAX_PATTERN_COUNT, PATBUFSIZE);
1858    
1859  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1860  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1861  }  }
1862    
# Line 334  printf("Exit status is 0 if any matches, Line 1864  printf("Exit status is 0 if any matches,
1864    
1865    
1866  /*************************************************  /*************************************************
1867  *                Handle an option                *  *    Handle a single-letter, no data option      *
1868  *************************************************/  *************************************************/
1869    
1870  static int  static int
# Line 342  handle_option(int letter, int options) Line 1872  handle_option(int letter, int options)
1872  {  {
1873  switch(letter)  switch(letter)
1874    {    {
1875    case -1:  help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1876      case N_HELP: help(); pcregrep_exit(0);
1877      case N_LBUFFER: line_buffered = TRUE; break;
1878      case N_LOFFSETS: line_offsets = number = TRUE; break;
1879      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1880    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1881    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1882      case 'H': filenames = FN_FORCE; break;
1883      case 'h': filenames = FN_NONE; break;
1884    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1885    case 'l': filenames_only = TRUE;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1886      case 'L': filenames = FN_NOMATCH_ONLY; break;
1887      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1888    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1889    case 'r': recurse = TRUE; break;    case 'o': only_matching = 0; break;
1890      case 'q': quiet = TRUE; break;
1891      case 'r': dee_action = dee_RECURSE; break;
1892    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1893      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1894    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1895    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1896      case 'x': process_options |= PO_LINE_MATCH; break;
1897    
1898      case 'V':
1899      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1900      pcregrep_exit(0);
1901      break;
1902    
1903      default:
1904      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1905      pcregrep_exit(usage(2));
1906      }
1907    
1908    return options;
1909    }
1910    
1911    
1912    
1913    
1914    /*************************************************
1915    *          Construct printed ordinal             *
1916    *************************************************/
1917    
1918    /* This turns a number into "1st", "3rd", etc. */
1919    
1920    static char *
1921    ordin(int n)
1922    {
1923    static char buffer[8];
1924    char *p = buffer;
1925    sprintf(p, "%d", n);
1926    while (*p != 0) p++;
1927    switch (n%10)
1928      {
1929      case 1: strcpy(p, "st"); break;
1930      case 2: strcpy(p, "nd"); break;
1931      case 3: strcpy(p, "rd"); break;
1932      default: strcpy(p, "th"); break;
1933      }
1934    return buffer;
1935    }
1936    
1937    
1938    
1939    /*************************************************
1940    *          Compile a single pattern              *
1941    *************************************************/
1942    
1943    /* When the -F option has been used, this is called for each substring.
1944    Otherwise it's called for each supplied pattern.
1945    
1946    Arguments:
1947      pattern        the pattern string
1948      options        the PCRE options
1949      filename       the file name, or NULL for a command-line pattern
1950      count          0 if this is the only command line pattern, or
1951                     number of the command line pattern, or
1952                     linenumber for a pattern from a file
1953    
1954    Returns:         TRUE on success, FALSE after an error
1955    */
1956    
1957    static BOOL
1958    compile_single_pattern(char *pattern, int options, char *filename, int count)
1959    {
1960    char buffer[PATBUFSIZE];
1961    const char *error;
1962    int errptr;
1963    
1964    if (pattern_count >= MAX_PATTERN_COUNT)
1965      {
1966      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1967        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1968      return FALSE;
1969      }
1970    
1971    sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1972      suffix[process_options]);
1973    pattern_list[pattern_count] =
1974      pcre_compile(buffer, options, &error, &errptr, pcretables);
1975    if (pattern_list[pattern_count] != NULL)
1976      {
1977      pattern_count++;
1978      return TRUE;
1979      }
1980    
1981    /* Handle compile errors */
1982    
1983    errptr -= (int)strlen(prefix[process_options]);
1984    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1985    
1986    if (filename == NULL)
1987      {
1988      if (count == 0)
1989        fprintf(stderr, "pcregrep: Error in command-line regex "
1990          "at offset %d: %s\n", errptr, error);
1991      else
1992        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1993          "at offset %d: %s\n", ordin(count), errptr, error);
1994      }
1995    else
1996      {
1997      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1998        "at offset %d: %s\n", count, filename, errptr, error);
1999      }
2000    
2001    return FALSE;
2002    }
2003    
2004    
2005    
2006    /*************************************************
2007    *           Compile one supplied pattern         *
2008    *************************************************/
2009    
2010    /* When the -F option has been used, each string may be a list of strings,
2011    separated by line breaks. They will be matched literally.
2012    
2013    Arguments:
2014      pattern        the pattern string
2015      options        the PCRE options
2016      filename       the file name, or NULL for a command-line pattern
2017      count          0 if this is the only command line pattern, or
2018                     number of the command line pattern, or
2019                     linenumber for a pattern from a file
2020    
2021    case 'V':  Returns:         TRUE on success, FALSE after an error
2022    fprintf(stderr, "pcregrep version %s using ", VERSION);  */
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
   exit(0);  
   break;  
2023    
2024    default:  static BOOL
2025    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);  compile_pattern(char *pattern, int options, char *filename, int count)
2026    exit(usage(2));  {
2027    if ((process_options & PO_FIXED_STRINGS) != 0)
2028      {
2029      char *eop = pattern + strlen(pattern);
2030      char buffer[PATBUFSIZE];
2031      for(;;)
2032        {
2033        int ellength;
2034        char *p = end_of_line(pattern, eop, &ellength);
2035        if (ellength == 0)
2036          return compile_single_pattern(pattern, options, filename, count);
2037        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2038        pattern = p;
2039        if (!compile_single_pattern(buffer, options, filename, count))
2040          return FALSE;
2041        }
2042    }    }
2043    else return compile_single_pattern(pattern, options, filename, count);
 return options;  
2044  }  }
2045    
2046    
2047    
   
2048  /*************************************************  /*************************************************
2049  *                Main program                    *  *                Main program                    *
2050  *************************************************/  *************************************************/
2051    
2052    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2053    
2054  int  int
2055  main(int argc, char **argv)  main(int argc, char **argv)
2056  {  {
2057  int i, j;  int i, j;
2058  int rc = 1;  int rc = 1;
2059  int options = 0;  int pcre_options = 0;
2060    int cmd_pattern_count = 0;
2061    int hint_count = 0;
2062  int errptr;  int errptr;
 const char *error;  
2063  BOOL only_one_at_top;  BOOL only_one_at_top;
2064    char *patterns[MAX_PATTERN_COUNT];
2065    const char *locale_from = "--locale";
2066    const char *error;
2067    
2068    #ifdef SUPPORT_PCREGREP_JIT
2069    pcre_jit_stack *jit_stack = NULL;
2070    #endif
2071    
2072    /* Set the default line ending value from the default in the PCRE library;
2073    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2074    Note that the return values from pcre_config(), though derived from the ASCII
2075    codes, are the same in EBCDIC environments, so we must use the actual values
2076    rather than escapes such as as '\r'. */
2077    
2078    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2079    switch(i)
2080      {
2081      default:               newline = (char *)"lf"; break;
2082      case 13:               newline = (char *)"cr"; break;
2083      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2084      case -1:               newline = (char *)"any"; break;
2085      case -2:               newline = (char *)"anycrlf"; break;
2086      }
2087    
2088  /* Process the options */  /* Process the options */
2089    
2090  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
2091    {    {
2092      option_item *op = NULL;
2093      char *option_data = (char *)"";    /* default to keep compiler happy */
2094      BOOL longop;
2095      BOOL longopwasequals = FALSE;
2096    
2097    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
2098    
2099    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
2100      but only if we have previously had -e or -f to define the patterns. */
2101    
2102      if (argv[i][1] == 0)
2103        {
2104        if (pattern_filename != NULL || pattern_count > 0) break;
2105          else pcregrep_exit(usage(2));
2106        }
2107    
2108      /* Handle a long name option, or -- to terminate the options */
2109    
2110    if (argv[i][1] == '-')    if (argv[i][1] == '-')
2111      {      {
2112      option_item *op;      char *arg = argv[i] + 2;
2113        char *argequals = strchr(arg, '=');
2114    
2115      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
2116        {        {
2117        pattern_filename = argv[i] + 7;        i++;
2118        continue;        break;                /* out of the options-handling loop */
2119        }        }
2120    
2121        longop = TRUE;
2122    
2123        /* Some long options have data that follows after =, for example file=name.
2124        Some options have variations in the long name spelling: specifically, we
2125        allow "regexp" because GNU grep allows it, though I personally go along
2126        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2127        These options are entered in the table as "regex(p)". Options can be in
2128        both these categories. */
2129    
2130      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2131        {        {
2132        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
2133          char *equals = strchr(op->long_name, '=');
2134    
2135          /* Handle options with only one spelling of the name */
2136    
2137          if (opbra == NULL)     /* Does not contain '(' */
2138            {
2139            if (equals == NULL)  /* Not thing=data case */
2140              {
2141              if (strcmp(arg, op->long_name) == 0) break;
2142              }
2143            else                 /* Special case xxx=data */
2144              {
2145              int oplen = (int)(equals - op->long_name);
2146              int arglen = (argequals == NULL)?
2147                (int)strlen(arg) : (int)(argequals - arg);
2148              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2149                {
2150                option_data = arg + arglen;
2151                if (*option_data == '=')
2152                  {
2153                  option_data++;
2154                  longopwasequals = TRUE;
2155                  }
2156                break;
2157                }
2158              }
2159            }
2160    
2161          /* Handle options with an alternate spelling of the name */
2162    
2163          else
2164          {          {
2165          options = handle_option(op->one_char, options);          char buff1[24];
2166          break;          char buff2[24];
2167    
2168            int baselen = (int)(opbra - op->long_name);
2169            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2170            int arglen = (argequals == NULL || equals == NULL)?
2171              (int)strlen(arg) : (int)(argequals - arg);
2172    
2173            sprintf(buff1, "%.*s", baselen, op->long_name);
2174            sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2175    
2176            if (strncmp(arg, buff1, arglen) == 0 ||
2177               strncmp(arg, buff2, arglen) == 0)
2178              {
2179              if (equals != NULL && argequals != NULL)
2180                {
2181                option_data = argequals;
2182                if (*option_data == '=')
2183                  {
2184                  option_data++;
2185                  longopwasequals = TRUE;
2186                  }
2187                }
2188              break;
2189              }
2190          }          }
2191        }        }
2192    
2193      if (op->one_char == 0)      if (op->one_char == 0)
2194        {        {
2195        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2196        exit(usage(2));        pcregrep_exit(usage(2));
2197        }        }
2198      }      }
2199    
2200    /* One-char options */    /* Jeffrey Friedl's debugging harness uses these additional options which
2201      are not in the right form for putting in the option table because they use
2202      only one hyphen, yet are more than one character long. By putting them
2203      separately here, they will not get displayed as part of the help() output,
2204      but I don't think Jeffrey will care about that. */
2205    
2206    #ifdef JFRIEDL_DEBUG
2207      else if (strcmp(argv[i], "-pre") == 0) {
2208              jfriedl_prefix = argv[++i];
2209              continue;
2210      } else if (strcmp(argv[i], "-post") == 0) {
2211              jfriedl_postfix = argv[++i];
2212              continue;
2213      } else if (strcmp(argv[i], "-XT") == 0) {
2214              sscanf(argv[++i], "%d", &jfriedl_XT);
2215              continue;
2216      } else if (strcmp(argv[i], "-XR") == 0) {
2217              sscanf(argv[++i], "%d", &jfriedl_XR);
2218              continue;
2219      }
2220    #endif
2221    
2222    
2223      /* One-char options; many that have no data may be in a single argument; we
2224      continue till we hit the last one or one that needs data. */
2225    
2226    else    else
2227      {      {
2228      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2229        longop = FALSE;
2230      while (*s != 0)      while (*s != 0)
2231        {        {
2232        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2233          {          {
2234          pattern_filename = s + 1;          if (*s == op->one_char) break;
2235          if (pattern_filename[0] == 0)          }
2236            {        if (op->one_char == 0)
2237            if (i >= argc - 1)          {
2238              {          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2239              fprintf(stderr, "pcregrep: File name missing after -f\n");            *s, argv[i]);
2240              exit(usage(2));          pcregrep_exit(usage(2));
2241              }          }
2242            pattern_filename = argv[++i];  
2243            }        /* Check for a single-character option that has data: OP_OP_NUMBER
2244          break;        is used for one that either has a numerical number or defaults, i.e. the
2245          data is optional. If a digit follows, there is data; if not, carry on
2246          with other single-character options in the same string. */
2247    
2248          option_data = s+1;
2249          if (op->type == OP_OP_NUMBER)
2250            {
2251            if (isdigit((unsigned char)s[1])) break;
2252            }
2253          else   /* Check for end or a dataless option */
2254            {
2255            if (op->type != OP_NODATA || s[1] == 0) break;
2256          }          }
2257        else options = handle_option(*s++, options);  
2258          /* Handle a single-character option with no data, then loop for the
2259          next character in the string. */
2260    
2261          pcre_options = handle_option(*s++, pcre_options);
2262        }        }
2263      }      }
   }  
2264    
2265  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
2266  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
2267      something in the PCRE options. */
2268    
2269  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
2270    {      {
2271    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
2272    return 2;      continue;
2273    }      }
2274    
2275  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2276      either has a value or defaults to something. It cannot have data in a
2277      separate item. At the moment, the only such options are "colo(u)r",
2278      "only-matching", and Jeffrey Friedl's special -S debugging option. */
2279    
2280  if (pattern_filename != NULL)    if (*option_data == 0 &&
2281    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
2282      {      {
2283      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
2284        strerror(errno));        {
2285      return 2;        case N_COLOUR:
2286          colour_option = (char *)"auto";
2287          break;
2288    
2289          case 'o':
2290          only_matching = 0;
2291          break;
2292    
2293    #ifdef JFRIEDL_DEBUG
2294          case 'S':
2295          S_arg = 0;
2296          break;
2297    #endif
2298          }
2299        continue;
2300        }
2301    
2302      /* Otherwise, find the data string for the option. */
2303    
2304      if (*option_data == 0)
2305        {
2306        if (i >= argc - 1 || longopwasequals)
2307          {
2308          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2309          pcregrep_exit(usage(2));
2310          }
2311        option_data = argv[++i];
2312      }      }
2313    while (fgets(buffer, sizeof(buffer), f) != NULL)  
2314      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2315      multiple times to create a list of patterns. */
2316    
2317      if (op->type == OP_PATLIST)
2318      {      {
2319      char *s = buffer + (int)strlen(buffer);      if (cmd_pattern_count >= MAX_PATTERN_COUNT)
     if (pattern_count >= MAX_PATTERN_COUNT)  
2320        {        {
2321        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2322          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2323        return 2;        return 2;
2324        }        }
2325      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2326      if (s == buffer) continue;      }
2327      *s = 0;  
2328      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2329        &errptr, NULL);  
2330      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2331               op->type != OP_OP_NUMBER)
2332        {
2333        *((char **)op->dataptr) = option_data;
2334        }
2335    
2336      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2337      only for unpicking arguments, so just keep it simple. */
2338    
2339      else
2340        {
2341        unsigned long int n = 0;
2342        char *endptr = option_data;
2343        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2344        while (isdigit((unsigned char)(*endptr)))
2345          n = n * 10 + (int)(*endptr++ - '0');
2346        if (toupper(*endptr) == 'K')
2347        {        {
2348        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        n *= 1024;
2349          pattern_count, errptr, error);        endptr++;
2350        return 2;        }
2351        else if (toupper(*endptr) == 'M')
2352          {
2353          n *= 1024*1024;
2354          endptr++;
2355          }
2356        if (*endptr != 0)
2357          {
2358          if (longop)
2359            {
2360            char *equals = strchr(op->long_name, '=');
2361            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2362              (int)(equals - op->long_name);
2363            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2364              option_data, nlen, op->long_name);
2365            }
2366          else
2367            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2368              option_data, op->one_char);
2369          pcregrep_exit(usage(2));
2370        }        }
2371        if (op->type == OP_LONGNUMBER)
2372            *((unsigned long int *)op->dataptr) = n;
2373        else
2374            *((int *)op->dataptr) = n;
2375        }
2376      }
2377    
2378    /* Options have been decoded. If -C was used, its value is used as a default
2379    for -A and -B. */
2380    
2381    if (both_context > 0)
2382      {
2383      if (after_context == 0) after_context = both_context;
2384      if (before_context == 0) before_context = both_context;
2385      }
2386    
2387    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2388    However, the latter two set only_matching. */
2389    
2390    if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2391        (file_offsets && line_offsets))
2392      {
2393      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2394        "and/or --line-offsets\n");
2395      pcregrep_exit(usage(2));
2396      }
2397    
2398    if (file_offsets || line_offsets) only_matching = 0;
2399    
2400    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2401    LC_ALL environment variable is set, and if so, use it. */
2402    
2403    if (locale == NULL)
2404      {
2405      locale = getenv("LC_ALL");
2406      locale_from = "LCC_ALL";
2407      }
2408    
2409    if (locale == NULL)
2410      {
2411      locale = getenv("LC_CTYPE");
2412      locale_from = "LC_CTYPE";
2413      }
2414    
2415    /* If a locale has been provided, set it, and generate the tables the PCRE
2416    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2417    
2418    if (locale != NULL)
2419      {
2420      if (setlocale(LC_CTYPE, locale) == NULL)
2421        {
2422        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2423          locale, locale_from);
2424        return 2;
2425        }
2426      pcretables = pcre_maketables();
2427      }
2428    
2429    /* Sort out colouring */
2430    
2431    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2432      {
2433      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2434      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2435      else
2436        {
2437        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2438          colour_option);
2439        return 2;
2440        }
2441      if (do_colour)
2442        {
2443        char *cs = getenv("PCREGREP_COLOUR");
2444        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2445        if (cs != NULL) colour_string = cs;
2446      }      }
   fclose(f);  
2447    }    }
2448    
2449  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2450    
2451    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2452      {
2453      pcre_options |= PCRE_NEWLINE_CR;
2454      endlinetype = EL_CR;
2455      }
2456    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2457      {
2458      pcre_options |= PCRE_NEWLINE_LF;
2459      endlinetype = EL_LF;
2460      }
2461    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2462      {
2463      pcre_options |= PCRE_NEWLINE_CRLF;
2464      endlinetype = EL_CRLF;
2465      }
2466    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2467      {
2468      pcre_options |= PCRE_NEWLINE_ANY;
2469      endlinetype = EL_ANY;
2470      }
2471    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2472      {
2473      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2474      endlinetype = EL_ANYCRLF;
2475      }
2476  else  else
2477    {    {
2478    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2479    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2480    if (pattern_list[0] == NULL)    }
2481    
2482    /* Interpret the text values for -d and -D */
2483    
2484    if (dee_option != NULL)
2485      {
2486      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2487      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2488      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2489      else
2490      {      {
2491      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
       error);  
2492      return 2;      return 2;
2493      }      }
   pattern_count++;  
2494    }    }
2495    
2496  /* Study the regular expressions, as we will be running them may times */  if (DEE_option != NULL)
2497      {
2498      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2499      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2500      else
2501        {
2502        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2503        return 2;
2504        }
2505      }
2506    
2507    /* Check the values for Jeffrey Friedl's debugging options. */
2508    
2509    #ifdef JFRIEDL_DEBUG
2510    if (S_arg > 9)
2511      {
2512      fprintf(stderr, "pcregrep: bad value for -S option\n");
2513      return 2;
2514      }
2515    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2516      {
2517      if (jfriedl_XT == 0) jfriedl_XT = 1;
2518      if (jfriedl_XR == 0) jfriedl_XR = 1;
2519      }
2520    #endif
2521    
2522    /* Get memory for the main buffer, and to store the pattern and hints lists. */
2523    
2524    bufsize = 3*bufthird;
2525    main_buffer = (char *)malloc(bufsize);
2526    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2527    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2528    
2529    if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2530      {
2531      fprintf(stderr, "pcregrep: malloc failed\n");
2532      goto EXIT2;
2533      }
2534    
2535    /* If no patterns were provided by -e, and there is no file provided by -f,
2536    the first argument is the one and only pattern, and it must exist. */
2537    
2538    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2539      {
2540      if (i >= argc) return usage(2);
2541      patterns[cmd_pattern_count++] = argv[i++];
2542      }
2543    
2544    /* Compile the patterns that were provided on the command line, either by
2545    multiple uses of -e or as a single unkeyed pattern. */
2546    
2547    for (j = 0; j < cmd_pattern_count; j++)
2548      {
2549      if (!compile_pattern(patterns[j], pcre_options, NULL,
2550           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2551        goto EXIT2;
2552      }
2553    
2554    /* Compile the regular expressions that are provided in a file. */
2555    
2556    if (pattern_filename != NULL)
2557      {
2558      int linenumber = 0;
2559      FILE *f;
2560      char *filename;
2561      char buffer[PATBUFSIZE];
2562    
2563      if (strcmp(pattern_filename, "-") == 0)
2564        {
2565        f = stdin;
2566        filename = stdin_name;
2567        }
2568      else
2569        {
2570        f = fopen(pattern_filename, "r");
2571        if (f == NULL)
2572          {
2573          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2574            strerror(errno));
2575          goto EXIT2;
2576          }
2577        filename = pattern_filename;
2578        }
2579    
2580      while (fgets(buffer, PATBUFSIZE, f) != NULL)
2581        {
2582        char *s = buffer + (int)strlen(buffer);
2583        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2584        *s = 0;
2585        linenumber++;
2586        if (buffer[0] == 0) continue;   /* Skip blank lines */
2587        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2588          goto EXIT2;
2589        }
2590    
2591      if (f != stdin) fclose(f);
2592      }
2593    
2594    /* Study the regular expressions, as we will be running them many times. Unless
2595    JIT has been explicitly disabled, arrange a stack for it to use. */
2596    
2597    #ifdef SUPPORT_PCREGREP_JIT
2598    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2599      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2600    #endif
2601    
2602  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2603    {    {
2604    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2605    if (error != NULL)    if (error != NULL)
2606      {      {
2607      char s[16];      char s[16];
2608      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2609      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2610      return 2;      goto EXIT2;
2611        }
2612      hint_count++;
2613    #ifdef SUPPORT_PCREGREP_JIT
2614      if (jit_stack != NULL && hints_list[j] != NULL)
2615        pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2616    #endif
2617      }
2618    
2619    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2620    pcre_extra block for each pattern. */
2621    
2622    if (match_limit > 0 || match_limit_recursion > 0)
2623      {
2624      for (j = 0; j < pattern_count; j++)
2625        {
2626        if (hints_list[j] == NULL)
2627          {
2628          hints_list[j] = malloc(sizeof(pcre_extra));
2629          if (hints_list[j] == NULL)
2630            {
2631            fprintf(stderr, "pcregrep: malloc failed\n");
2632            pcregrep_exit(2);
2633            }
2634          }
2635        if (match_limit > 0)
2636          {
2637          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2638          hints_list[j]->match_limit = match_limit;
2639          }
2640        if (match_limit_recursion > 0)
2641          {
2642          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2643          hints_list[j]->match_limit_recursion = match_limit_recursion;
2644          }
2645      }      }
2646    }    }
2647    
2648  /* If there are no further arguments, do the business on stdin and exit */  /* If there are include or exclude patterns, compile them. */
2649    
2650  if (i >= argc) return pcregrep(stdin, NULL);  if (exclude_pattern != NULL)
2651      {
2652      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2653        pcretables);
2654      if (exclude_compiled == NULL)
2655        {
2656        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2657          errptr, error);
2658        goto EXIT2;
2659        }
2660      }
2661    
2662    if (include_pattern != NULL)
2663      {
2664      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2665        pcretables);
2666      if (include_compiled == NULL)
2667        {
2668        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2669          errptr, error);
2670        goto EXIT2;
2671        }
2672      }
2673    
2674    if (exclude_dir_pattern != NULL)
2675      {
2676      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2677        pcretables);
2678      if (exclude_dir_compiled == NULL)
2679        {
2680        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2681          errptr, error);
2682        goto EXIT2;
2683        }
2684      }
2685    
2686    if (include_dir_pattern != NULL)
2687      {
2688      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2689        pcretables);
2690      if (include_dir_compiled == NULL)
2691        {
2692        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2693          errptr, error);
2694        goto EXIT2;
2695        }
2696      }
2697    
2698    /* If there are no further arguments, do the business on stdin and exit. */
2699    
2700    if (i >= argc)
2701      {
2702      rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2703        (filenames > FN_DEFAULT)? stdin_name : NULL);
2704      goto EXIT;
2705      }
2706    
2707  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2708  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2709  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2710    otherwise forced. */
2711    
2712  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2713    
2714  for (; i < argc; i++)  for (; i < argc; i++)
2715    {    {
2716    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2717    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2718      if (frc > 1) rc = frc;
2719        else if (frc == 0 && rc == 1) rc = 0;
2720    }    }
2721    
2722  return rc;  EXIT:
2723    #ifdef SUPPORT_PCREGREP_JIT
2724    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2725    #endif
2726    if (main_buffer != NULL) free(main_buffer);
2727    if (pattern_list != NULL)
2728      {
2729      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2730      free(pattern_list);
2731      }
2732    if (hints_list != NULL)
2733      {
2734      for (i = 0; i < hint_count; i++)
2735        {
2736        if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2737        }
2738      free(hints_list);
2739      }
2740    pcregrep_exit(rc);
2741    
2742    EXIT2:
2743    rc = 2;
2744    goto EXIT;
2745  }  }
2746    
2747  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.904

  ViewVC Help
Powered by ViewVC 1.1.5