/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 947 by ph10, Sun Mar 4 16:51:13 2012 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.2 09-Jan-2006"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define PATBUFSIZE BUFSIZ
78  #else  #else
79  #define MBUFTHIRD 8192  #define PATBUFSIZE 8192
80  #endif  #endif
81    
   
82  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 100  enum { DEE_READ, DEE_SKIP };
100  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
101  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* Binary file options */
108    
109    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
110    
111    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
112    environments), a warning is issued if the value of fwrite() is ignored.
113    Unfortunately, casting to (void) does not suppress the warning. To get round
114    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
115    apply to fprintf(). */
116    
117    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
118    
119    
120    
121  /*************************************************  /*************************************************
# Line 94  regular code. */ Line 127  regular code. */
127    
128  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
129  static int S_arg = -1;  static int S_arg = -1;
130    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
131    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
132    static const char *jfriedl_prefix = "";
133    static const char *jfriedl_postfix = "";
134  #endif  #endif
135    
136    static int  endlinetype;
137    
138  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
139  static char *colour_option = NULL;  static char *colour_option = NULL;
140  static char *dee_option = NULL;  static char *dee_option = NULL;
141  static char *DEE_option = NULL;  static char *DEE_option = NULL;
142    static char *main_buffer = NULL;
143    static char *newline = NULL;
144  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
145  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
146  static char *locale = NULL;  static char *locale = NULL;
# Line 107  static char *locale = NULL; Line 148  static char *locale = NULL;
148  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
149    
150  static int  pattern_count = 0;  static int  pattern_count = 0;
151  static pcre **pattern_list;  static pcre **pattern_list = NULL;
152  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
153    
154    static char *file_list = NULL;
155  static char *include_pattern = NULL;  static char *include_pattern = NULL;
156  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
157    static char *include_dir_pattern = NULL;
158    static char *exclude_dir_pattern = NULL;
159    
160  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
161  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
162    static pcre *include_dir_compiled = NULL;
163    static pcre *exclude_dir_compiled = NULL;
164    
165  static int after_context = 0;  static int after_context = 0;
166  static int before_context = 0;  static int before_context = 0;
167    static int binary_files = BIN_BINARY;
168  static int both_context = 0;  static int both_context = 0;
169    static int bufthird = PCREGREP_BUFSIZE;
170    static int bufsize = 3*PCREGREP_BUFSIZE;
171  static int dee_action = dee_READ;  static int dee_action = dee_READ;
172  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
173  static int error_count = 0;  static int error_count = 0;
174  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
175    static int only_matching = -1;
176  static int process_options = 0;  static int process_options = 0;
177    
178    #ifdef SUPPORT_PCREGREP_JIT
179    static int study_options = PCRE_STUDY_JIT_COMPILE;
180    #else
181    static int study_options = 0;
182    #endif
183    
184    static unsigned long int match_limit = 0;
185    static unsigned long int match_limit_recursion = 0;
186    
187  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
188  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
189    static BOOL file_offsets = FALSE;
190  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
191  static BOOL invert = FALSE;  static BOOL invert = FALSE;
192    static BOOL line_buffered = FALSE;
193    static BOOL line_offsets = FALSE;
194  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
195  static BOOL number = FALSE;  static BOOL number = FALSE;
196  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
197    static BOOL resource_error = FALSE;
198  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
199  static BOOL silent = FALSE;  static BOOL silent = FALSE;
200    static BOOL utf8 = FALSE;
201    
202  /* Structure for options and list of them */  /* Structure for options and list of them */
203    
204  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
205         OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST, OP_BINFILES };
206    
207  typedef struct option_item {  typedef struct option_item {
208    int type;    int type;
# Line 151  typedef struct option_item { Line 215  typedef struct option_item {
215  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
216  used to identify them. */  used to identify them. */
217    
218  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
219  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
220  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
221  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
222  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
223  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
224  #define N_NULL      (-7)  #define N_LABEL        (-7)
225    #define N_LOCALE       (-8)
226    #define N_NULL         (-9)
227    #define N_LOFFSETS     (-10)
228    #define N_FOFFSETS     (-11)
229    #define N_LBUFFER      (-12)
230    #define N_M_LIMIT      (-13)
231    #define N_M_LIMIT_REC  (-14)
232    #define N_BUFSIZE      (-15)
233    #define N_NOJIT        (-16)
234    #define N_FILE_LIST    (-17)
235    #define N_BINARY_FILES (-18)
236    
237  static option_item optionlist[] = {  static option_item optionlist[] = {
238    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
239    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
240    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
241    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
242    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
243    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
244    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
245    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
246    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
247    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
248    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
249    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
250    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
251    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
252    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
253    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
254    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_STRING,     N_FILE_LIST, &file_list,     "file-list=path","read files to search from file" },
255    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
256    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
257    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
258    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
259    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
260    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },  #ifdef SUPPORT_PCREGREP_JIT
261    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
262    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },  #else
263    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
264    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },  #endif
265      { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
266      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
267      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
268      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
269      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
270      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
271      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
272      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
273      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
274      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
275      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
276      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
277      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
278      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
279      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
280      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
281      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
282      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
283    
284      /* These two were accidentally implemented with underscores instead of
285      hyphens in the option names. As this was not discovered for several releases,
286      the incorrect versions are left in the table for compatibility. However, the
287      --help function misses out any option that has an underscore in its name. */
288    
289      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
290      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
291    
292  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
293    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
294  #endif  #endif
# Line 210  static const char *prefix[] = { Line 312  static const char *prefix[] = {
312  static const char *suffix[] = {  static const char *suffix[] = {
313    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
314    
315    /* UTF-8 tables - used only when the newline setting is "any". */
316    
317    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
318    
319    const char utf8_table4[] = {
320      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
321      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
322      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
323      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
324    
325    
326    
327    /*************************************************
328    *         Exit from the program                  *
329    *************************************************/
330    
331    /* If there has been a resource error, give a suitable message.
332    
333    Argument:  the return code
334    Returns:   does not return
335    */
336    
337    static void
338    pcregrep_exit(int rc)
339    {
340    if (resource_error)
341      {
342      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
343        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
344        PCRE_ERROR_JIT_STACKLIMIT);
345      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
346      }
347    
348    exit(rc);
349    }
350    
351    
352  /*************************************************  /*************************************************
# Line 222  although at present the only ones are fo Line 359  although at present the only ones are fo
359    
360  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
361    
362  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
363  #include <sys/types.h>  #include <sys/types.h>
364  #include <sys/stat.h>  #include <sys/stat.h>
365  #include <dirent.h>  #include <dirent.h>
# Line 254  for (;;) Line 391  for (;;)
391    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
392      return dent->d_name;      return dent->d_name;
393    }    }
394  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
395  }  }
396    
397  static void  static void
# Line 276  return (statbuf.st_mode & S_IFMT) == S_I Line 413  return (statbuf.st_mode & S_IFMT) == S_I
413  }  }
414    
415    
416  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
417    
418  static BOOL  static BOOL
419  is_stdout_tty(void)  is_stdout_tty(void)
# Line 284  is_stdout_tty(void) Line 421  is_stdout_tty(void)
421  return isatty(fileno(stdout));  return isatty(fileno(stdout));
422  }  }
423    
424    static BOOL
425    is_file_tty(FILE *f)
426    {
427    return isatty(fileno(f));
428    }
429    
430    
431  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
432    
433  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
434  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
435  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
436    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
437    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
438    undefined when it is indeed undefined. */
439    
440  #elif HAVE_WIN32API  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
441    
442  #ifndef STRICT  #ifndef STRICT
443  # define STRICT  # define STRICT
# Line 300  when it did not exist. */ Line 445  when it did not exist. */
445  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
446  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
447  #endif  #endif
448    
449    #include <windows.h>
450    
451  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
452  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
453  #endif  #endif
454    
 #include <windows.h>  
   
455  typedef struct directory_type  typedef struct directory_type
456  {  {
457  HANDLE handle;  HANDLE handle;
# Line 335  dir = (directory_type *) malloc(sizeof(* Line 481  dir = (directory_type *) malloc(sizeof(*
481  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
482    {    {
483    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
484    exit(2);    pcregrep_exit(2);
485    }    }
486  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
487  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 390  regular if they are not directories. */ Line 536  regular if they are not directories. */
536    
537  int isregfile(char *filename)  int isregfile(char *filename)
538  {  {
539  return !isdirectory(filename)  return !isdirectory(filename);
540  }  }
541    
542    
543  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
544    
545  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
546    
547  static BOOL  static BOOL
548  is_stdout_tty(void)  is_stdout_tty(void)
549  {  {
550  FALSE;  return FALSE;
551    }
552    
553    static BOOL
554    is_file_tty(FILE *f)
555    {
556    return FALSE;
557  }  }
558    
559    
# Line 414  FALSE; Line 566  FALSE;
566  typedef void directory_type;  typedef void directory_type;
567    
568  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
569  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
570  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
571  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
572    
573    
# Line 426  void closedirectory(directory_type *dir) Line 578  void closedirectory(directory_type *dir)
578  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
579    
580    
581  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
582    
583  static BOOL  static BOOL
584  is_stdout_tty(void)  is_stdout_tty(void)
# Line 434  is_stdout_tty(void) Line 586  is_stdout_tty(void)
586  return FALSE;  return FALSE;
587  }  }
588    
589    static BOOL
590    is_file_tty(FILE *f)
591    {
592    return FALSE;
593    }
594    
595  #endif  #endif
596    
597    
598    
599  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
600  /*************************************************  /*************************************************
601  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
602  *************************************************/  *************************************************/
# Line 462  return sys_errlist[n]; Line 619  return sys_errlist[n];
619    
620    
621  /*************************************************  /*************************************************
622    *            Read one line of input              *
623    *************************************************/
624    
625    /* Normally, input is read using fread() into a large buffer, so many lines may
626    be read at once. However, doing this for tty input means that no output appears
627    until a lot of input has been typed. Instead, tty input is handled line by
628    line. We cannot use fgets() for this, because it does not stop at a binary
629    zero, and therefore there is no way of telling how many characters it has read,
630    because there may be binary zeros embedded in the data.
631    
632    Arguments:
633      buffer     the buffer to read into
634      length     the maximum number of characters to read
635      f          the file
636    
637    Returns:     the number of characters read, zero at end of file
638    */
639    
640    static unsigned int
641    read_one_line(char *buffer, int length, FILE *f)
642    {
643    int c;
644    int yield = 0;
645    while ((c = fgetc(f)) != EOF)
646      {
647      buffer[yield++] = c;
648      if (c == '\n' || yield >= length) break;
649      }
650    return yield;
651    }
652    
653    
654    
655    /*************************************************
656    *             Find end of line                   *
657    *************************************************/
658    
659    /* The length of the endline sequence that is found is set via lenptr. This may
660    be zero at the very end of the file if there is no line-ending sequence there.
661    
662    Arguments:
663      p         current position in line
664      endptr    end of available data
665      lenptr    where to put the length of the eol sequence
666    
667    Returns:    pointer after the last byte of the line,
668                including the newline byte(s)
669    */
670    
671    static char *
672    end_of_line(char *p, char *endptr, int *lenptr)
673    {
674    switch(endlinetype)
675      {
676      default:      /* Just in case */
677      case EL_LF:
678      while (p < endptr && *p != '\n') p++;
679      if (p < endptr)
680        {
681        *lenptr = 1;
682        return p + 1;
683        }
684      *lenptr = 0;
685      return endptr;
686    
687      case EL_CR:
688      while (p < endptr && *p != '\r') p++;
689      if (p < endptr)
690        {
691        *lenptr = 1;
692        return p + 1;
693        }
694      *lenptr = 0;
695      return endptr;
696    
697      case EL_CRLF:
698      for (;;)
699        {
700        while (p < endptr && *p != '\r') p++;
701        if (++p >= endptr)
702          {
703          *lenptr = 0;
704          return endptr;
705          }
706        if (*p == '\n')
707          {
708          *lenptr = 2;
709          return p + 1;
710          }
711        }
712      break;
713    
714      case EL_ANYCRLF:
715      while (p < endptr)
716        {
717        int extra = 0;
718        register int c = *((unsigned char *)p);
719    
720        if (utf8 && c >= 0xc0)
721          {
722          int gcii, gcss;
723          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
724          gcss = 6*extra;
725          c = (c & utf8_table3[extra]) << gcss;
726          for (gcii = 1; gcii <= extra; gcii++)
727            {
728            gcss -= 6;
729            c |= (p[gcii] & 0x3f) << gcss;
730            }
731          }
732    
733        p += 1 + extra;
734    
735        switch (c)
736          {
737          case 0x0a:    /* LF */
738          *lenptr = 1;
739          return p;
740    
741          case 0x0d:    /* CR */
742          if (p < endptr && *p == 0x0a)
743            {
744            *lenptr = 2;
745            p++;
746            }
747          else *lenptr = 1;
748          return p;
749    
750          default:
751          break;
752          }
753        }   /* End of loop for ANYCRLF case */
754    
755      *lenptr = 0;  /* Must have hit the end */
756      return endptr;
757    
758      case EL_ANY:
759      while (p < endptr)
760        {
761        int extra = 0;
762        register int c = *((unsigned char *)p);
763    
764        if (utf8 && c >= 0xc0)
765          {
766          int gcii, gcss;
767          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
768          gcss = 6*extra;
769          c = (c & utf8_table3[extra]) << gcss;
770          for (gcii = 1; gcii <= extra; gcii++)
771            {
772            gcss -= 6;
773            c |= (p[gcii] & 0x3f) << gcss;
774            }
775          }
776    
777        p += 1 + extra;
778    
779        switch (c)
780          {
781          case 0x0a:    /* LF */
782          case 0x0b:    /* VT */
783          case 0x0c:    /* FF */
784          *lenptr = 1;
785          return p;
786    
787          case 0x0d:    /* CR */
788          if (p < endptr && *p == 0x0a)
789            {
790            *lenptr = 2;
791            p++;
792            }
793          else *lenptr = 1;
794          return p;
795    
796          case 0x85:    /* NEL */
797          *lenptr = utf8? 2 : 1;
798          return p;
799    
800          case 0x2028:  /* LS */
801          case 0x2029:  /* PS */
802          *lenptr = 3;
803          return p;
804    
805          default:
806          break;
807          }
808        }   /* End of loop for ANY case */
809    
810      *lenptr = 0;  /* Must have hit the end */
811      return endptr;
812      }     /* End of overall switch */
813    }
814    
815    
816    
817    /*************************************************
818    *         Find start of previous line            *
819    *************************************************/
820    
821    /* This is called when looking back for before lines to print.
822    
823    Arguments:
824      p         start of the subsequent line
825      startptr  start of available data
826    
827    Returns:    pointer to the start of the previous line
828    */
829    
830    static char *
831    previous_line(char *p, char *startptr)
832    {
833    switch(endlinetype)
834      {
835      default:      /* Just in case */
836      case EL_LF:
837      p--;
838      while (p > startptr && p[-1] != '\n') p--;
839      return p;
840    
841      case EL_CR:
842      p--;
843      while (p > startptr && p[-1] != '\n') p--;
844      return p;
845    
846      case EL_CRLF:
847      for (;;)
848        {
849        p -= 2;
850        while (p > startptr && p[-1] != '\n') p--;
851        if (p <= startptr + 1 || p[-2] == '\r') return p;
852        }
853      return p;   /* But control should never get here */
854    
855      case EL_ANY:
856      case EL_ANYCRLF:
857      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
858      if (utf8) while ((*p & 0xc0) == 0x80) p--;
859    
860      while (p > startptr)
861        {
862        register int c;
863        char *pp = p - 1;
864    
865        if (utf8)
866          {
867          int extra = 0;
868          while ((*pp & 0xc0) == 0x80) pp--;
869          c = *((unsigned char *)pp);
870          if (c >= 0xc0)
871            {
872            int gcii, gcss;
873            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
874            gcss = 6*extra;
875            c = (c & utf8_table3[extra]) << gcss;
876            for (gcii = 1; gcii <= extra; gcii++)
877              {
878              gcss -= 6;
879              c |= (pp[gcii] & 0x3f) << gcss;
880              }
881            }
882          }
883        else c = *((unsigned char *)pp);
884    
885        if (endlinetype == EL_ANYCRLF) switch (c)
886          {
887          case 0x0a:    /* LF */
888          case 0x0d:    /* CR */
889          return p;
890    
891          default:
892          break;
893          }
894    
895        else switch (c)
896          {
897          case 0x0a:    /* LF */
898          case 0x0b:    /* VT */
899          case 0x0c:    /* FF */
900          case 0x0d:    /* CR */
901          case 0x85:    /* NEL */
902          case 0x2028:  /* LS */
903          case 0x2029:  /* PS */
904          return p;
905    
906          default:
907          break;
908          }
909    
910        p = pp;  /* Back one character */
911        }        /* End of loop for ANY case */
912    
913      return startptr;  /* Hit start of data */
914      }     /* End of overall switch */
915    }
916    
917    
918    
919    
920    
921    /*************************************************
922  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
923  *************************************************/  *************************************************/
924    
# Line 486  if (after_context > 0 && lastmatchnumber Line 943  if (after_context > 0 && lastmatchnumber
943    int count = 0;    int count = 0;
944    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
945      {      {
946        int ellength;
947      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
948      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
949      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
950      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
951      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
952      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
953      }      }
954    hyphenpending = TRUE;    hyphenpending = TRUE;
955    }    }
# Line 500  if (after_context > 0 && lastmatchnumber Line 958  if (after_context > 0 && lastmatchnumber
958    
959    
960  /*************************************************  /*************************************************
961    *   Apply patterns to subject till one matches   *
962    *************************************************/
963    
964    /* This function is called to run through all patterns, looking for a match. It
965    is used multiple times for the same subject when colouring is enabled, in order
966    to find all possible matches.
967    
968    Arguments:
969      matchptr     the start of the subject
970      length       the length of the subject to match
971      startoffset  where to start matching
972      offsets      the offets vector to fill in
973      mrc          address of where to put the result of pcre_exec()
974    
975    Returns:      TRUE if there was a match
976                  FALSE if there was no match
977                  invert if there was a non-fatal error
978    */
979    
980    static BOOL
981    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
982      int *mrc)
983    {
984    int i;
985    size_t slen = length;
986    const char *msg = "this text:\n\n";
987    if (slen > 200)
988      {
989      slen = 200;
990      msg = "text that starts:\n\n";
991      }
992    for (i = 0; i < pattern_count; i++)
993      {
994      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
995        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
996      if (*mrc >= 0) return TRUE;
997      if (*mrc == PCRE_ERROR_NOMATCH) continue;
998      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
999      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
1000      fprintf(stderr, "%s", msg);
1001      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1002      fprintf(stderr, "\n\n");
1003      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1004          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1005        resource_error = TRUE;
1006      if (error_count++ > 20)
1007        {
1008        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1009        pcregrep_exit(2);
1010        }
1011      return invert;    /* No more matching; don't show the line again */
1012      }
1013    
1014    return FALSE;  /* No match, no errors */
1015    }
1016    
1017    
1018    
1019    /*************************************************
1020  *            Grep an individual file             *  *            Grep an individual file             *
1021  *************************************************/  *************************************************/
1022    
1023  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1024  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1025  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1026  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1027  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
1028  "before" context printing.  "before" context printing.
1029    
1030  Arguments:  Arguments:
1031    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
1032                   the gzFile pointer when reading is via libz
1033                   the BZFILE pointer when reading is via libbz2
1034      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1035      filename     the file name or NULL (for errors)
1036    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1037                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1038                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1039    
1040  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1041                 1 otherwise (no matches)                 1 otherwise (no matches)
1042                   2 if an overlong line is encountered
1043                   3 if there is a read error on a .bz2 file
1044  */  */
1045    
1046  static int  static int
1047  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1048  {  {
1049  int rc = 1;  int rc = 1;
1050  int linenumber = 1;  int linenumber = 1;
1051  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1052  int count = 0;  int count = 0;
1053  int offsets[99];  int filepos = 0;
1054    int offsets[OFFSET_SIZE];
1055  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1056  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1057  char *endptr;  char *endptr;
1058  size_t bufflength;  size_t bufflength;
1059    BOOL binary = FALSE;
1060  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1061    BOOL input_line_buffered = line_buffered;
1062    FILE *in = NULL;                    /* Ensure initialized */
1063    
1064    #ifdef SUPPORT_LIBZ
1065    gzFile ingz = NULL;
1066    #endif
1067    
1068    #ifdef SUPPORT_LIBBZ2
1069    BZFILE *inbz2 = NULL;
1070    #endif
1071    
 /* Do the first read into the start of the buffer and set up the pointer to  
 end of what we have. */  
1072    
1073  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  /* Do the first read into the start of the buffer and set up the pointer to end
1074  endptr = buffer + bufflength;  of what we have. In the case of libz, a non-zipped .gz file will be read as a
1075    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1076    fail. */
1077    
1078    #ifdef SUPPORT_LIBZ
1079    if (frtype == FR_LIBZ)
1080      {
1081      ingz = (gzFile)handle;
1082      bufflength = gzread (ingz, main_buffer, bufsize);
1083      }
1084    else
1085    #endif
1086    
1087    #ifdef SUPPORT_LIBBZ2
1088    if (frtype == FR_LIBBZ2)
1089      {
1090      inbz2 = (BZFILE *)handle;
1091      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1092      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1093      }                                    /* without the cast it is unsigned. */
1094    else
1095    #endif
1096    
1097      {
1098      in = (FILE *)handle;
1099      if (is_file_tty(in)) input_line_buffered = TRUE;
1100      bufflength = input_line_buffered?
1101        read_one_line(main_buffer, bufsize, in) :
1102        fread(main_buffer, 1, bufsize, in);
1103      }
1104    
1105    endptr = main_buffer + bufflength;
1106    
1107    /* Unless binary-files=text, see if we have a binary file. This uses the same
1108    rule as GNU grep, namely, a search for a binary zero byte near the start of the
1109    file. */
1110    
1111    if (binary_files != BIN_TEXT)
1112      {
1113      binary =
1114        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1115      if (binary && binary_files == BIN_NOMATCH) return 1;
1116      }
1117    
1118  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1119  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 548  way, the buffer is shifted left and re-f Line 1122  way, the buffer is shifted left and re-f
1122    
1123  while (ptr < endptr)  while (ptr < endptr)
1124    {    {
1125    int i;    int endlinelength;
1126    int mrc = 0;    int mrc = 0;
1127    BOOL match = FALSE;    int startoffset = 0;
1128      BOOL match;
1129      char *matchptr = ptr;
1130    char *t = ptr;    char *t = ptr;
1131    size_t length, linelength;    size_t length, linelength;
1132    
1133    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1134    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1135    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1136    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1137    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1138    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1139      first line. */
1140    linelength = 0;  
1141    while (t < endptr && *t++ != '\n') linelength++;    t = end_of_line(t, endptr, &endlinelength);
1142    length = multiline? endptr - ptr : linelength;    linelength = t - ptr - endlinelength;
1143      length = multiline? (size_t)(endptr - ptr) : linelength;
1144    /* Run through all the patterns until one matches. Note that we don't include  
1145    the final newline in the subject string. */    /* Check to see if the line we are looking at extends right to the very end
1146      of the buffer without a line terminator. This means the line is too long to
1147    for (i = 0; i < pattern_count; i++)    handle. */
1148      {  
1149      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    if (endlinelength == 0 && t == main_buffer + bufsize)
1150        offsets, 99);      {
1151      if (mrc >= 0) { match = TRUE; break; }      fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1152      if (mrc != PCRE_ERROR_NOMATCH)                      "pcregrep: check the --buffer-size option\n",
1153        {                      linenumber,
1154        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);                      (filename == NULL)? "" : " of file ",
1155        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);                      (filename == NULL)? "" : filename);
1156        fprintf(stderr, "this line:\n");      return 2;
       fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
1157      }      }
1158    
1159      /* Extra processing for Jeffrey Friedl's debugging. */
1160    
1161    #ifdef JFRIEDL_DEBUG
1162      if (jfriedl_XT || jfriedl_XR)
1163      {
1164          #include <sys/time.h>
1165          #include <time.h>
1166          struct timeval start_time, end_time;
1167          struct timezone dummy;
1168          int i;
1169    
1170          if (jfriedl_XT)
1171          {
1172              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1173              const char *orig = ptr;
1174              ptr = malloc(newlen + 1);
1175              if (!ptr) {
1176                      printf("out of memory");
1177                      pcregrep_exit(2);
1178              }
1179              endptr = ptr;
1180              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1181              for (i = 0; i < jfriedl_XT; i++) {
1182                      strncpy(endptr, orig,  length);
1183                      endptr += length;
1184              }
1185              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1186              length = newlen;
1187          }
1188    
1189          if (gettimeofday(&start_time, &dummy) != 0)
1190                  perror("bad gettimeofday");
1191    
1192    
1193          for (i = 0; i < jfriedl_XR; i++)
1194              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1195                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1196    
1197          if (gettimeofday(&end_time, &dummy) != 0)
1198                  perror("bad gettimeofday");
1199    
1200          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1201                          -
1202                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1203    
1204          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1205          return 0;
1206      }
1207    #endif
1208    
1209      /* We come back here after a match when the -o option (only_matching) is set,
1210      in order to find any further matches in the same line. */
1211    
1212      ONLY_MATCHING_RESTART:
1213    
1214      /* Run through all the patterns until one matches or there is an error other
1215      than NOMATCH. This code is in a subroutine so that it can be re-used for
1216      finding subsequent matches when colouring matched lines. */
1217    
1218      match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1219    
1220    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1221    
1222    if (match != invert)    if (match != invert)
# Line 610  while (ptr < endptr) Line 1230  while (ptr < endptr)
1230      /* Just count if just counting is wanted. */      /* Just count if just counting is wanted. */
1231    
1232      if (count_only) count++;      if (count_only) count++;
1233    
1234        /* When handling a binary file and binary-files==binary, the "binary"
1235        variable will be set true (it's false in all other cases). In this
1236        situation we just want to output the file name. No need to scan further. */
1237    
1238        else if (binary)
1239          {
1240          fprintf(stdout, "Binary file %s matches\n", filename);
1241          return 0;
1242          }
1243    
1244      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1245      in the file. */      in the file. */
1246    
1247      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1248        {        {
1249        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1250        return 0;        return 0;
# Line 624  while (ptr < endptr) Line 1254  while (ptr < endptr)
1254    
1255      else if (quiet) return 0;      else if (quiet) return 0;
1256    
1257      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, or a
1258      does not pring any context. */      captured portion of it, as long as this string is not empty, and the
1259        --file-offsets and --line-offsets options output offsets for the matching
1260        substring (they both force --only-matching = 0). None of these options
1261        prints any context. Afterwards, adjust the start and then jump back to look
1262        for further matches in the same line. If we are in invert mode, however,
1263        nothing is printed and we do not restart - this could still be useful
1264        because the return code is set. */
1265    
1266      else if (only_matching)      else if (only_matching >= 0)
1267        {        {
1268        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1269        if (number) fprintf(stdout, "%d:", linenumber);          {
1270        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1271        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1272            if (line_offsets)
1273              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1274                offsets[1] - offsets[0]);
1275            else if (file_offsets)
1276              fprintf(stdout, "%d,%d\n",
1277                (int)(filepos + matchptr + offsets[0] - ptr),
1278                offsets[1] - offsets[0]);
1279            else if (only_matching < mrc)
1280              {
1281              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1282              if (plen > 0)
1283                {
1284                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1285                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1286                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1287                fprintf(stdout, "\n");
1288                }
1289              }
1290            else if (printname != NULL || number) fprintf(stdout, "\n");
1291            match = FALSE;
1292            if (line_buffered) fflush(stdout);
1293            rc = 0;                      /* Had some success */
1294            startoffset = offsets[1];    /* Restart after the match */
1295            goto ONLY_MATCHING_RESTART;
1296            }
1297        }        }
1298    
1299      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 646  while (ptr < endptr) Line 1307  while (ptr < endptr)
1307    
1308        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1309          {          {
1310            int ellength;
1311          int linecount = 0;          int linecount = 0;
1312          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1313    
1314          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1315            {            {
1316            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1317            linecount++;            linecount++;
1318            }            }
1319    
# Line 665  while (ptr < endptr) Line 1326  while (ptr < endptr)
1326            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1327            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1328            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1329            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1330            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1331            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1332            }            }
1333          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1334          }          }
# Line 689  while (ptr < endptr) Line 1350  while (ptr < endptr)
1350          int linecount = 0;          int linecount = 0;
1351          char *p = ptr;          char *p = ptr;
1352    
1353          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1354                 linecount < before_context)                 linecount < before_context)
1355            {            {
1356            linecount++;            linecount++;
1357            p--;            p = previous_line(p, main_buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1358            }            }
1359    
1360          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 702  while (ptr < endptr) Line 1362  while (ptr < endptr)
1362    
1363          while (p < ptr)          while (p < ptr)
1364            {            {
1365              int ellength;
1366            char *pp = p;            char *pp = p;
1367            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1368            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1369            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1370            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            FWRITE(p, 1, pp - p, stdout);
1371            p = pp + 1;            p = pp;
1372            }            }
1373          }          }
1374    
# Line 722  while (ptr < endptr) Line 1383  while (ptr < endptr)
1383    
1384        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1385        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1386        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1387        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1388          the match will always be before the first newline sequence. */
1389    
1390        if (multiline)        if (multiline & !invert)
1391          {          {
1392          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1393          t = ptr;          t = ptr;
1394          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch)
1395          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1396          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &endlinelength);
1397              if (t < endmatch) linenumber++; else break;
1398              }
1399            linelength = t - ptr - endlinelength;
1400          }          }
1401    
1402        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 746  while (ptr < endptr) Line 1411  while (ptr < endptr)
1411          {          {
1412          int first = S_arg * 2;          int first = S_arg * 2;
1413          int last  = first + 1;          int last  = first + 1;
1414          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1415          fprintf(stdout, "X");          fprintf(stdout, "X");
1416          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1417          }          }
1418        else        else
1419  #endif  #endif
1420    
1421        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1422          matches, but not of course if the line is a non-match. */
1423    
1424        if (do_colour)        if (do_colour && !invert)
1425          {          {
1426          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1427            FWRITE(ptr, 1, offsets[0], stdout);
1428          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1429          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1430          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1431          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1432              {
1433              startoffset = offsets[1];
1434              if (startoffset >= (int)linelength + endlinelength ||
1435                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1436                break;
1437              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1438              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1439              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1440              fprintf(stdout, "%c[00m", 0x1b);
1441              }
1442    
1443            /* In multiline mode, we may have already printed the complete line
1444            and its line-ending characters (if they matched the pattern), so there
1445            may be no more to print. */
1446    
1447            plength = (int)((linelength + endlinelength) - startoffset);
1448            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1449          }          }
       else fwrite(ptr, 1, linelength, stdout);  
1450    
1451        fprintf(stdout, "\n");        /* Not colouring; no need to search for further matches */
1452    
1453          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1454        }        }
1455    
1456      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1457        given, flush the output. */
1458    
1459        if (line_buffered) fflush(stdout);
1460      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1461    
1462      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1463      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1464    
1465      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1466      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1467      }      }
1468    
1469    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1470      anything to be printed), we have to move on to the end of the match before
1471      proceeding. */
1472    
1473      if (multiline && invert && match)
1474        {
1475        int ellength;
1476        char *endmatch = ptr + offsets[1];
1477        t = ptr;
1478        while (t < endmatch)
1479          {
1480          t = end_of_line(t, endptr, &ellength);
1481          if (t <= endmatch) linenumber++; else break;
1482          }
1483        endmatch = end_of_line(endmatch, endptr, &ellength);
1484        linelength = endmatch - ptr - ellength;
1485        }
1486    
1487      /* Advance to after the newline and increment the line number. The file
1488      offset to the current line is maintained in filepos. */
1489    
1490    ptr += linelength + 1;    ptr += linelength + endlinelength;
1491      filepos += (int)(linelength + endlinelength);
1492    linenumber++;    linenumber++;
1493    
1494      /* If input is line buffered, and the buffer is not yet full, read another
1495      line and add it into the buffer. */
1496    
1497      if (input_line_buffered && bufflength < (size_t)bufsize)
1498        {
1499        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1500        bufflength += add;
1501        endptr += add;
1502        }
1503    
1504    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1505    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1506    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1507    about to be lost, print them. */    about to be lost, print them. */
1508    
1509    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1510      {      {
1511      if (after_context > 0 &&      if (after_context > 0 &&
1512          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1513          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1514        {        {
1515        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1516        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 801  while (ptr < endptr) Line 1518  while (ptr < endptr)
1518    
1519      /* Now do the shuffle */      /* Now do the shuffle */
1520    
1521      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1522      ptr -= MBUFTHIRD;      ptr -= bufthird;
1523      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1524      endptr = buffer + bufflength;  #ifdef SUPPORT_LIBZ
1525        if (frtype == FR_LIBZ)
1526          bufflength = 2*bufthird +
1527            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1528        else
1529    #endif
1530    
1531    #ifdef SUPPORT_LIBBZ2
1532        if (frtype == FR_LIBBZ2)
1533          bufflength = 2*bufthird +
1534            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1535        else
1536    #endif
1537    
1538        bufflength = 2*bufthird +
1539          (input_line_buffered?
1540           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1541           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1542        endptr = main_buffer + bufflength;
1543    
1544      /* Adjust any last match point */      /* Adjust any last match point */
1545    
1546      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1547      }      }
1548    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1549    
1550  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1551  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1552    
1553  if (!only_matching && !count_only)  if (only_matching < 0 && !count_only)
1554    {    {
1555    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1556    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 834  if (filenames == FN_NOMATCH_ONLY) Line 1569  if (filenames == FN_NOMATCH_ONLY)
1569    
1570  if (count_only)  if (count_only)
1571    {    {
1572    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1573    fprintf(stdout, "%d\n", count);      {
1574        if (printname != NULL && filenames != FN_NONE)
1575          fprintf(stdout, "%s:", printname);
1576        fprintf(stdout, "%d\n", count);
1577        }
1578    }    }
1579    
1580  return rc;  return rc;
# Line 867  grep_or_recurse(char *pathname, BOOL dir Line 1606  grep_or_recurse(char *pathname, BOOL dir
1606  {  {
1607  int rc = 1;  int rc = 1;
1608  int sep;  int sep;
1609  FILE *in;  int frtype;
1610    void *handle;
1611    FILE *in = NULL;           /* Ensure initialized */
1612    
1613    #ifdef SUPPORT_LIBZ
1614    gzFile ingz = NULL;
1615    #endif
1616    
1617    #ifdef SUPPORT_LIBBZ2
1618    BZFILE *inbz2 = NULL;
1619    #endif
1620    
1621    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1622    int pathlen;
1623    #endif
1624    
1625  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1626    
1627  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1628    {    {
1629    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN, stdin_name,
1630      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1631        stdin_name : NULL);        stdin_name : NULL);
1632    }    }
1633    
   
1634  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1635  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1636  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1637    system-specific. */
1638    
1639  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1640    {    {
# Line 902  if ((sep = isdirectory(pathname)) != 0) Line 1655  if ((sep = isdirectory(pathname)) != 0)
1655    
1656      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1657        {        {
1658        int frc, blen;        int frc, nflen;
1659        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1660        blen = strlen(buffer);        nflen = (int)(strlen(nextfile));
1661    
1662        if (exclude_compiled != NULL &&        if (isdirectory(buffer))
1663            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)          {
1664          continue;          if (exclude_dir_compiled != NULL &&
1665                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1666        if (include_compiled != NULL &&            continue;
1667            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
1668          continue;          if (include_dir_compiled != NULL &&
1669                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1670              continue;
1671            }
1672          else
1673            {
1674            if (exclude_compiled != NULL &&
1675                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1676              continue;
1677    
1678            if (include_compiled != NULL &&
1679                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1680              continue;
1681            }
1682    
1683        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1684        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 935  skipping was not requested. The scan pro Line 1701  skipping was not requested. The scan pro
1701  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1702  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1703    
1704  in = fopen(pathname, "r");  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1705  if (in == NULL)  pathlen = (int)(strlen(pathname));
1706    #endif
1707    
1708    /* Open using zlib if it is supported and the file name ends with .gz. */
1709    
1710    #ifdef SUPPORT_LIBZ
1711    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1712      {
1713      ingz = gzopen(pathname, "rb");
1714      if (ingz == NULL)
1715        {
1716        if (!silent)
1717          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1718            strerror(errno));
1719        return 2;
1720        }
1721      handle = (void *)ingz;
1722      frtype = FR_LIBZ;
1723      }
1724    else
1725    #endif
1726    
1727    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1728    
1729    #ifdef SUPPORT_LIBBZ2
1730    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1731      {
1732      inbz2 = BZ2_bzopen(pathname, "rb");
1733      handle = (void *)inbz2;
1734      frtype = FR_LIBBZ2;
1735      }
1736    else
1737    #endif
1738    
1739    /* Otherwise use plain fopen(). The label is so that we can come back here if
1740    an attempt to read a .bz2 file indicates that it really is a plain file. */
1741    
1742    #ifdef SUPPORT_LIBBZ2
1743    PLAIN_FILE:
1744    #endif
1745      {
1746      in = fopen(pathname, "rb");
1747      handle = (void *)in;
1748      frtype = FR_PLAIN;
1749      }
1750    
1751    /* All the opening methods return errno when they fail. */
1752    
1753    if (handle == NULL)
1754    {    {
1755    if (!silent)    if (!silent)
1756      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 944  if (in == NULL) Line 1758  if (in == NULL)
1758    return 2;    return 2;
1759    }    }
1760    
1761  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1762    
1763    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1764    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1765    
1766    /* Close in an appropriate manner. */
1767    
1768    #ifdef SUPPORT_LIBZ
1769    if (frtype == FR_LIBZ)
1770      gzclose(ingz);
1771    else
1772    #endif
1773    
1774    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1775    read failed. If the error indicates that the file isn't in fact bzipped, try
1776    again as a normal file. */
1777    
1778    #ifdef SUPPORT_LIBBZ2
1779    if (frtype == FR_LIBBZ2)
1780      {
1781      if (rc == 3)
1782        {
1783        int errnum;
1784        const char *err = BZ2_bzerror(inbz2, &errnum);
1785        if (errnum == BZ_DATA_ERROR_MAGIC)
1786          {
1787          BZ2_bzclose(inbz2);
1788          goto PLAIN_FILE;
1789          }
1790        else if (!silent)
1791          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1792            pathname, err);
1793        rc = 2;    /* The normal "something went wrong" code */
1794        }
1795      BZ2_bzclose(inbz2);
1796      }
1797    else
1798    #endif
1799    
1800    /* Normal file close */
1801    
1802  fclose(in);  fclose(in);
1803    
1804    /* Pass back the yield from pcregrep(). */
1805    
1806  return rc;  return rc;
1807  }  }
1808    
# Line 968  for (op = optionlist; op->one_char != 0; Line 1823  for (op = optionlist; op->one_char != 0;
1823    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1824    }    }
1825  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1826  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1827      "options.\n");
1828  return rc;  return rc;
1829  }  }
1830    
# Line 987  option_item *op; Line 1843  option_item *op;
1843  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1844  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1845  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1846  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1847    
1848    #ifdef SUPPORT_LIBZ
1849    printf("Files whose names end in .gz are read using zlib.\n");
1850    #endif
1851    
1852    #ifdef SUPPORT_LIBBZ2
1853    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1854    #endif
1855    
1856    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1857    printf("Other files and the standard input are read as plain files.\n\n");
1858    #else
1859    printf("All files are read as plain files, without any interpretation.\n\n");
1860    #endif
1861    
1862    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1863  printf("Options:\n");  printf("Options:\n");
1864    
1865  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
1866    {    {
1867    int n;    int n;
1868    char s[4];    char s[4];
1869    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
1870    printf("  %s --%s%n", s, op->long_name, &n);    /* Two options were accidentally implemented and documented with underscores
1871    n = 30 - n;    instead of hyphens in their names, something that was not noticed for quite a
1872      few releases. When fixing this, I left the underscored versions in the list
1873      in case people were using them. However, we don't want to display them in the
1874      help data. There are no other options that contain underscores, and we do not
1875      expect ever to implement such options. Therefore, just omit any option that
1876      contains an underscore. */
1877    
1878      if (strchr(op->long_name, '_') != NULL) continue;
1879    
1880      if (op->one_char > 0 && (op->long_name)[0] == 0)
1881        n = 31 - printf("  -%c", op->one_char);
1882      else
1883        {
1884        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1885          else strcpy(s, "   ");
1886        n = 31 - printf("  %s --%s", s, op->long_name);
1887        }
1888    
1889    if (n < 1) n = 1;    if (n < 1) n = 1;
1890    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                           ", op->help_text);
1891    }    }
1892    
1893  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1894  printf("trailing white space is removed and blank lines are ignored.\n");  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1895  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("When reading patterns or file names from a file, trailing white\n");
1896    printf("space is removed and blank lines are ignored.\n");
1897    printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1898      MAX_PATTERN_COUNT, PATBUFSIZE);
1899    
1900  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1901  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 1023  handle_option(int letter, int options) Line 1913  handle_option(int letter, int options)
1913  {  {
1914  switch(letter)  switch(letter)
1915    {    {
1916    case N_HELP: help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1917      case N_HELP: help(); pcregrep_exit(0);
1918      case N_LBUFFER: line_buffered = TRUE; break;
1919      case N_LOFFSETS: line_offsets = number = TRUE; break;
1920      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1921      case 'a': binary_files = BIN_TEXT; break;
1922    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1923    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1924    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1925      case 'I': binary_files = BIN_NOMATCH; break;
1926    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1927    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1928    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1929    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1930    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1931    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1932    case 'o': only_matching = TRUE; break;    case 'o': only_matching = 0; break;
1933    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1934    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1935    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1936    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1937    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1938    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1939    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1940    
1941    case 'V':    case 'V':
1942    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1943    fprintf(stderr, "PCRE version %s\n", pcre_version());    pcregrep_exit(0);
   exit(0);  
1944    break;    break;
1945    
1946    default:    default:
1947    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1948    exit(usage(2));    pcregrep_exit(usage(2));
1949    }    }
1950    
1951  return options;  return options;
# Line 1105  Returns:         TRUE on success, FALSE Line 2000  Returns:         TRUE on success, FALSE
2000  static BOOL  static BOOL
2001  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_single_pattern(char *pattern, int options, char *filename, int count)
2002  {  {
2003  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
2004  const char *error;  const char *error;
2005  int errptr;  int errptr;
2006    
# Line 1116  if (pattern_count >= MAX_PATTERN_COUNT) Line 2011  if (pattern_count >= MAX_PATTERN_COUNT)
2011    return FALSE;    return FALSE;
2012    }    }
2013    
2014  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
2015    suffix[process_options]);    suffix[process_options]);
2016  pattern_list[pattern_count] =  pattern_list[pattern_count] =
2017    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
2018  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
2019      {
2020      pattern_count++;
2021      return TRUE;
2022      }
2023    
2024  /* Handle compile errors */  /* Handle compile errors */
2025    
# Line 1152  return FALSE; Line 2051  return FALSE;
2051  *************************************************/  *************************************************/
2052    
2053  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
2054  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
2055    
2056  Arguments:  Arguments:
2057    pattern        the pattern string    pattern        the pattern string
# Line 1170  compile_pattern(char *pattern, int optio Line 2069  compile_pattern(char *pattern, int optio
2069  {  {
2070  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
2071    {    {
2072    char buffer[MBUFTHIRD];    char *eop = pattern + strlen(pattern);
2073      char buffer[PATBUFSIZE];
2074    for(;;)    for(;;)
2075      {      {
2076      char *p = strchr(pattern, '\n');      int ellength;
2077      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
2078        if (ellength == 0)
2079        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
2080      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2081      pattern = p + 1;      pattern = p;
2082      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
2083        return FALSE;        return FALSE;
2084      }      }
# Line 1200  int i, j; Line 2101  int i, j;
2101  int rc = 1;  int rc = 1;
2102  int pcre_options = 0;  int pcre_options = 0;
2103  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
2104    int hint_count = 0;
2105  int errptr;  int errptr;
2106  BOOL only_one_at_top;  BOOL only_one_at_top;
2107  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
2108  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2109  const char *error;  const char *error;
2110    
2111    #ifdef SUPPORT_PCREGREP_JIT
2112    pcre_jit_stack *jit_stack = NULL;
2113    #endif
2114    
2115    /* Set the default line ending value from the default in the PCRE library;
2116    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2117    Note that the return values from pcre_config(), though derived from the ASCII
2118    codes, are the same in EBCDIC environments, so we must use the actual values
2119    rather than escapes such as as '\r'. */
2120    
2121    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2122    switch(i)
2123      {
2124      default:               newline = (char *)"lf"; break;
2125      case 13:               newline = (char *)"cr"; break;
2126      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2127      case -1:               newline = (char *)"any"; break;
2128      case -2:               newline = (char *)"anycrlf"; break;
2129      }
2130    
2131  /* Process the options */  /* Process the options */
2132    
2133  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1223  for (i = 1; i < argc; i++) Line 2145  for (i = 1; i < argc; i++)
2145    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2146      {      {
2147      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2148        else exit(usage(2));        else pcregrep_exit(usage(2));
2149      }      }
2150    
2151    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1245  for (i = 1; i < argc; i++) Line 2167  for (i = 1; i < argc; i++)
2167      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2168      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2169      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2170      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2171      these categories, fortunately. */      both these categories. */
2172    
2173      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2174        {        {
2175        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2176        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2177        if (opbra == NULL)     /* Not a (p) case */  
2178          /* Handle options with only one spelling of the name */
2179    
2180          if (opbra == NULL)     /* Does not contain '(' */
2181          {          {
2182          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2183            {            {
# Line 1260  for (i = 1; i < argc; i++) Line 2185  for (i = 1; i < argc; i++)
2185            }            }
2186          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2187            {            {
2188            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2189            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2190                (int)strlen(arg) : (int)(argequals - arg);
2191            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2192              {              {
2193              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1274  for (i = 1; i < argc; i++) Line 2200  for (i = 1; i < argc; i++)
2200              }              }
2201            }            }
2202          }          }
2203        else                   /* Special case xxxx(p) */  
2204          /* Handle options with an alternate spelling of the name */
2205    
2206          else
2207          {          {
2208          char buff1[24];          char buff1[24];
2209          char buff2[24];          char buff2[24];
2210          int baselen = opbra - op->long_name;  
2211            int baselen = (int)(opbra - op->long_name);
2212            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2213            int arglen = (argequals == NULL || equals == NULL)?
2214              (int)strlen(arg) : (int)(argequals - arg);
2215    
2216          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2217          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2218            opbra + 1);  
2219          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2220               strncmp(arg, buff2, arglen) == 0)
2221              {
2222              if (equals != NULL && argequals != NULL)
2223                {
2224                option_data = argequals;
2225                if (*option_data == '=')
2226                  {
2227                  option_data++;
2228                  longopwasequals = TRUE;
2229                  }
2230                }
2231            break;            break;
2232              }
2233          }          }
2234        }        }
2235    
2236      if (op->one_char == 0)      if (op->one_char == 0)
2237        {        {
2238        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2239        exit(usage(2));        pcregrep_exit(usage(2));
2240        }        }
2241      }      }
2242    
2243      /* Jeffrey Friedl's debugging harness uses these additional options which
2244      are not in the right form for putting in the option table because they use
2245      only one hyphen, yet are more than one character long. By putting them
2246      separately here, they will not get displayed as part of the help() output,
2247      but I don't think Jeffrey will care about that. */
2248    
2249    #ifdef JFRIEDL_DEBUG
2250      else if (strcmp(argv[i], "-pre") == 0) {
2251              jfriedl_prefix = argv[++i];
2252              continue;
2253      } else if (strcmp(argv[i], "-post") == 0) {
2254              jfriedl_postfix = argv[++i];
2255              continue;
2256      } else if (strcmp(argv[i], "-XT") == 0) {
2257              sscanf(argv[++i], "%d", &jfriedl_XT);
2258              continue;
2259      } else if (strcmp(argv[i], "-XR") == 0) {
2260              sscanf(argv[++i], "%d", &jfriedl_XR);
2261              continue;
2262      }
2263    #endif
2264    
2265    
2266    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
2267    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
2268    
# Line 1304  for (i = 1; i < argc; i++) Line 2273  for (i = 1; i < argc; i++)
2273      while (*s != 0)      while (*s != 0)
2274        {        {
2275        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2276          { if (*s == op->one_char) break; }          {
2277            if (*s == op->one_char) break;
2278            }
2279        if (op->one_char == 0)        if (op->one_char == 0)
2280          {          {
2281          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2282            *s, argv[i]);            *s, argv[i]);
2283          exit(usage(2));          pcregrep_exit(usage(2));
2284          }          }
2285        if (op->type != OP_NODATA || s[1] == 0)  
2286          /* Check for a single-character option that has data: OP_OP_NUMBER
2287          is used for one that either has a numerical number or defaults, i.e. the
2288          data is optional. If a digit follows, there is data; if not, carry on
2289          with other single-character options in the same string. */
2290    
2291          option_data = s+1;
2292          if (op->type == OP_OP_NUMBER)
2293          {          {
2294          option_data = s+1;          if (isdigit((unsigned char)s[1])) break;
         break;  
2295          }          }
2296          else   /* Check for end or a dataless option */
2297            {
2298            if (op->type != OP_NODATA || s[1] == 0) break;
2299            }
2300    
2301          /* Handle a single-character option with no data, then loop for the
2302          next character in the string. */
2303    
2304        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2305        }        }
2306      }      }
# Line 1332  for (i = 1; i < argc; i++) Line 2317  for (i = 1; i < argc; i++)
2317    
2318    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2319    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2320    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2321    Jeffrey Friedl's special debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2322    
2323    if (*option_data == 0 &&    if (*option_data == 0 &&
2324        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1343  for (i = 1; i < argc; i++) Line 2328  for (i = 1; i < argc; i++)
2328        case N_COLOUR:        case N_COLOUR:
2329        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2330        break;        break;
2331    
2332          case 'o':
2333          only_matching = 0;
2334          break;
2335    
2336  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2337        case 'S':        case 'S':
2338        S_arg = 0;        S_arg = 0;
# Line 1359  for (i = 1; i < argc; i++) Line 2349  for (i = 1; i < argc; i++)
2349      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2350        {        {
2351        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2352        exit(usage(2));        pcregrep_exit(usage(2));
2353        }        }
2354      option_data = argv[++i];      option_data = argv[++i];
2355      }      }
2356    
2357    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_PATLIST, it's the -e option, which can be called
2358    multiple times to create a list of patterns. */    multiple times to create a list of patterns. */
2359    
2360    if (op->type == OP_PATLIST)    if (op->type == OP_PATLIST)
2361      {      {
2362      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      if (cmd_pattern_count >= MAX_PATTERN_COUNT)
# Line 1377  for (i = 1; i < argc; i++) Line 2367  for (i = 1; i < argc; i++)
2367        }        }
2368      patterns[cmd_pattern_count++] = option_data;      patterns[cmd_pattern_count++] = option_data;
2369      }      }
2370    
2371      /* Handle OP_BINARY_FILES */
2372    
2373      else if (op->type == OP_BINFILES)
2374        {
2375        if (strcmp(option_data, "binary") == 0)
2376          binary_files = BIN_BINARY;
2377        else if (strcmp(option_data, "without-match") == 0)
2378          binary_files = BIN_NOMATCH;
2379        else if (strcmp(option_data, "text") == 0)
2380          binary_files = BIN_TEXT;
2381        else
2382          {
2383          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2384            option_data);
2385          pcregrep_exit(usage(2));
2386          }
2387        }
2388    
2389    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
2390    
2391    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2392               op->type != OP_OP_NUMBER)
2393      {      {
2394      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2395      }      }
2396    
2397      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2398      only for unpicking arguments, so just keep it simple. */
2399    
2400    else    else
2401      {      {
2402      char *endptr;      unsigned long int n = 0;
2403      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2404        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2405        while (isdigit((unsigned char)(*endptr)))
2406          n = n * 10 + (int)(*endptr++ - '0');
2407        if (toupper(*endptr) == 'K')
2408          {
2409          n *= 1024;
2410          endptr++;
2411          }
2412        else if (toupper(*endptr) == 'M')
2413          {
2414          n *= 1024*1024;
2415          endptr++;
2416          }
2417      if (*endptr != 0)      if (*endptr != 0)
2418        {        {
2419        if (longop)        if (longop)
2420          {          {
2421          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2422          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2423            equals - op->long_name;            (int)(equals - op->long_name);
2424          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2425            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2426          }          }
2427        else        else
2428          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2429            option_data, op->one_char);            option_data, op->one_char);
2430        exit(usage(2));        pcregrep_exit(usage(2));
2431        }        }
2432      *((int *)op->dataptr) = n;      if (op->type == OP_LONGNUMBER)
2433            *((unsigned long int *)op->dataptr) = n;
2434        else
2435            *((int *)op->dataptr) = n;
2436      }      }
2437    }    }
2438    
# Line 1416  if (both_context > 0) Line 2445  if (both_context > 0)
2445    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2446    }    }
2447    
2448    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2449    However, the latter two set only_matching. */
2450    
2451    if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2452        (file_offsets && line_offsets))
2453      {
2454      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2455        "and/or --line-offsets\n");
2456      pcregrep_exit(usage(2));
2457      }
2458    
2459    if (file_offsets || line_offsets) only_matching = 0;
2460    
2461  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2462  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2463    
# Line 1465  if (colour_option != NULL && strcmp(colo Line 2507  if (colour_option != NULL && strcmp(colo
2507      }      }
2508    }    }
2509    
2510    /* Interpret the newline type; the default settings are Unix-like. */
2511    
2512    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2513      {
2514      pcre_options |= PCRE_NEWLINE_CR;
2515      endlinetype = EL_CR;
2516      }
2517    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2518      {
2519      pcre_options |= PCRE_NEWLINE_LF;
2520      endlinetype = EL_LF;
2521      }
2522    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2523      {
2524      pcre_options |= PCRE_NEWLINE_CRLF;
2525      endlinetype = EL_CRLF;
2526      }
2527    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2528      {
2529      pcre_options |= PCRE_NEWLINE_ANY;
2530      endlinetype = EL_ANY;
2531      }
2532    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2533      {
2534      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2535      endlinetype = EL_ANYCRLF;
2536      }
2537    else
2538      {
2539      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2540      return 2;
2541      }
2542    
2543  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
2544    
2545  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1490  if (DEE_option != NULL) Line 2565  if (DEE_option != NULL)
2565      }      }
2566    }    }
2567    
2568  /* Check the value for Jeff Friedl's debugging option. */  /* Check the values for Jeffrey Friedl's debugging options. */
2569    
2570  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2571  if (S_arg > 9)  if (S_arg > 9)
# Line 1498  if (S_arg > 9) Line 2573  if (S_arg > 9)
2573    fprintf(stderr, "pcregrep: bad value for -S option\n");    fprintf(stderr, "pcregrep: bad value for -S option\n");
2574    return 2;    return 2;
2575    }    }
2576    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2577      {
2578      if (jfriedl_XT == 0) jfriedl_XT = 1;
2579      if (jfriedl_XR == 0) jfriedl_XR = 1;
2580      }
2581  #endif  #endif
2582    
2583  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer, and to store the pattern and hints lists. */
2584    
2585    bufsize = 3*bufthird;
2586    main_buffer = (char *)malloc(bufsize);
2587  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2588  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2589    
2590  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2591    {    {
2592    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2593    return 2;    goto EXIT2;
2594    }    }
2595    
2596  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1527  for (j = 0; j < cmd_pattern_count; j++) Line 2609  for (j = 0; j < cmd_pattern_count; j++)
2609    {    {
2610    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
2611         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2612      return 2;      goto EXIT2;
2613    }    }
2614    
2615  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1537  if (pattern_filename != NULL) Line 2619  if (pattern_filename != NULL)
2619    int linenumber = 0;    int linenumber = 0;
2620    FILE *f;    FILE *f;
2621    char *filename;    char *filename;
2622    char buffer[MBUFTHIRD];    char buffer[PATBUFSIZE];
2623    
2624    if (strcmp(pattern_filename, "-") == 0)    if (strcmp(pattern_filename, "-") == 0)
2625      {      {
# Line 1551  if (pattern_filename != NULL) Line 2633  if (pattern_filename != NULL)
2633        {        {
2634        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2635          strerror(errno));          strerror(errno));
2636        return 2;        goto EXIT2;
2637        }        }
2638      filename = pattern_filename;      filename = pattern_filename;
2639      }      }
2640    
2641    while (fgets(buffer, MBUFTHIRD, f) != NULL)    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2642      {      {
2643      char *s = buffer + (int)strlen(buffer);      char *s = buffer + (int)strlen(buffer);
2644      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
# Line 1564  if (pattern_filename != NULL) Line 2646  if (pattern_filename != NULL)
2646      linenumber++;      linenumber++;
2647      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2648      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2649        return 2;        goto EXIT2;
2650      }      }
2651    
2652    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
2653    }    }
2654    
2655  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times. Unless
2656    JIT has been explicitly disabled, arrange a stack for it to use. */
2657    
2658    #ifdef SUPPORT_PCREGREP_JIT
2659    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2660      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2661    #endif
2662    
2663  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2664    {    {
2665    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2666    if (error != NULL)    if (error != NULL)
2667      {      {
2668      char s[16];      char s[16];
2669      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2670      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2671      return 2;      goto EXIT2;
2672        }
2673      hint_count++;
2674    #ifdef SUPPORT_PCREGREP_JIT
2675      if (jit_stack != NULL && hints_list[j] != NULL)
2676        pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2677    #endif
2678      }
2679    
2680    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2681    pcre_extra block for each pattern. */
2682    
2683    if (match_limit > 0 || match_limit_recursion > 0)
2684      {
2685      for (j = 0; j < pattern_count; j++)
2686        {
2687        if (hints_list[j] == NULL)
2688          {
2689          hints_list[j] = malloc(sizeof(pcre_extra));
2690          if (hints_list[j] == NULL)
2691            {
2692            fprintf(stderr, "pcregrep: malloc failed\n");
2693            pcregrep_exit(2);
2694            }
2695          }
2696        if (match_limit > 0)
2697          {
2698          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2699          hints_list[j]->match_limit = match_limit;
2700          }
2701        if (match_limit_recursion > 0)
2702          {
2703          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2704          hints_list[j]->match_limit_recursion = match_limit_recursion;
2705          }
2706      }      }
2707    }    }
2708    
# Line 1594  if (exclude_pattern != NULL) Line 2716  if (exclude_pattern != NULL)
2716      {      {
2717      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2718        errptr, error);        errptr, error);
2719      return 2;      goto EXIT2;
2720      }      }
2721    }    }
2722    
# Line 1606  if (include_pattern != NULL) Line 2728  if (include_pattern != NULL)
2728      {      {
2729      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2730        errptr, error);        errptr, error);
2731      return 2;      goto EXIT2;
2732      }      }
2733    }    }
2734    
2735  /* If there are no further arguments, do the business on stdin and exit. */  if (exclude_dir_pattern != NULL)
2736      {
2737      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2738        pcretables);
2739      if (exclude_dir_compiled == NULL)
2740        {
2741        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2742          errptr, error);
2743        goto EXIT2;
2744        }
2745      }
2746    
2747  if (i >= argc)  if (include_dir_pattern != NULL)
2748    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2749      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2750        pcretables);
2751      if (include_dir_compiled == NULL)
2752        {
2753        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2754          errptr, error);
2755        goto EXIT2;
2756        }
2757      }
2758    
2759    /* If a file that contains a list of files to search has been specified, read
2760    it line by line and search the given files. Otherwise, if there are no further
2761    arguments, do the business on stdin and exit. */
2762    
2763    if (file_list != NULL)
2764      {
2765      char buffer[PATBUFSIZE];
2766      FILE *fl;
2767      if (strcmp(file_list, "-") == 0) fl = stdin; else
2768        {
2769        fl = fopen(file_list, "rb");
2770        if (fl == NULL)
2771          {
2772          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", file_list,
2773            strerror(errno));
2774          goto EXIT2;
2775          }
2776        }
2777      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2778        {
2779        int frc;
2780        char *end = buffer + (int)strlen(buffer);
2781        while (end > buffer && isspace(end[-1])) end--;
2782        *end = 0;
2783        if (*buffer != 0)
2784          {
2785          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
2786          if (frc > 1) rc = frc;
2787            else if (frc == 0 && rc == 1) rc = 0;
2788          }
2789        }
2790      if (fl != stdin) fclose (fl);
2791      }
2792    
2793    /* Do this only if there was no file list (and no file arguments). */
2794    
2795  /* Otherwise, work through the remaining arguments as files or directories.  else if (i >= argc)
2796  Pass in the fact that there is only one argument at top level - this suppresses    {
2797  the file name if the argument is not a directory and filenames are not    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2798  otherwise forced. */      (filenames > FN_DEFAULT)? stdin_name : NULL);
2799      goto EXIT;
2800      }
2801    
2802  only_one_at_top = i == argc - 1;   /* Catch initial value of i */  /* After handling file-list or if there are remaining arguments, work through
2803    them as files or directories. Pass in the fact that there is only one argument
2804    at top level - this suppresses the file name if the argument is not a directory
2805    and filenames are not otherwise forced. */
2806    
2807    only_one_at_top = i == argc - 1 && file_list == NULL;
2808    
2809  for (; i < argc; i++)  for (; i < argc; i++)
2810    {    {
# Line 1630  for (; i < argc; i++) Line 2814  for (; i < argc; i++)
2814      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2815    }    }
2816    
2817  return rc;  EXIT:
2818    #ifdef SUPPORT_PCREGREP_JIT
2819    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2820    #endif
2821    if (main_buffer != NULL) free(main_buffer);
2822    if (pattern_list != NULL)
2823      {
2824      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2825      free(pattern_list);
2826      }
2827    if (hints_list != NULL)
2828      {
2829      for (i = 0; i < hint_count; i++)
2830        {
2831        if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2832        }
2833      free(hints_list);
2834      }
2835    pcregrep_exit(rc);
2836    
2837    EXIT2:
2838    rc = 2;
2839    goto EXIT;
2840  }  }
2841    
2842  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.87  
changed lines
  Added in v.947

  ViewVC Help
Powered by ViewVC 1.1.5