/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 236 by ph10, Tue Sep 11 12:57:06 2007 UTC revision 947 by ph10, Sun Mar 4 16:51:13 2012 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 55  POSSIBILITY OF SUCH DAMAGE.
55  #include <unistd.h>  #include <unistd.h>
56  #endif  #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 63  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define PATBUFSIZE BUFSIZ
78  #else  #else
79  #define MBUFTHIRD 8192  #define PATBUFSIZE 8192
80  #endif  #endif
81    
82  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
# Line 91  enum { DEE_READ, DEE_SKIP }; Line 104  enum { DEE_READ, DEE_SKIP };
104    
105  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* Binary file options */
108    
109    enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
110    
111    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
112    environments), a warning is issued if the value of fwrite() is ignored.
113    Unfortunately, casting to (void) does not suppress the warning. To get round
114    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
115    apply to fprintf(). */
116    
117    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
118    
119    
120    
121  /*************************************************  /*************************************************
# Line 114  static char *colour_string = (char *)"1; Line 139  static char *colour_string = (char *)"1;
139  static char *colour_option = NULL;  static char *colour_option = NULL;
140  static char *dee_option = NULL;  static char *dee_option = NULL;
141  static char *DEE_option = NULL;  static char *DEE_option = NULL;
142    static char *main_buffer = NULL;
143  static char *newline = NULL;  static char *newline = NULL;
144  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
145  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
# Line 125  static int  pattern_count = 0; Line 151  static int  pattern_count = 0;
151  static pcre **pattern_list = NULL;  static pcre **pattern_list = NULL;
152  static pcre_extra **hints_list = NULL;  static pcre_extra **hints_list = NULL;
153    
154    static char *file_list = NULL;
155  static char *include_pattern = NULL;  static char *include_pattern = NULL;
156  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
157    static char *include_dir_pattern = NULL;
158    static char *exclude_dir_pattern = NULL;
159    
160  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
161  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
162    static pcre *include_dir_compiled = NULL;
163    static pcre *exclude_dir_compiled = NULL;
164    
165  static int after_context = 0;  static int after_context = 0;
166  static int before_context = 0;  static int before_context = 0;
167    static int binary_files = BIN_BINARY;
168  static int both_context = 0;  static int both_context = 0;
169    static int bufthird = PCREGREP_BUFSIZE;
170    static int bufsize = 3*PCREGREP_BUFSIZE;
171  static int dee_action = dee_READ;  static int dee_action = dee_READ;
172  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
173  static int error_count = 0;  static int error_count = 0;
174  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
175    static int only_matching = -1;
176  static int process_options = 0;  static int process_options = 0;
177    
178    #ifdef SUPPORT_PCREGREP_JIT
179    static int study_options = PCRE_STUDY_JIT_COMPILE;
180    #else
181    static int study_options = 0;
182    #endif
183    
184    static unsigned long int match_limit = 0;
185    static unsigned long int match_limit_recursion = 0;
186    
187  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
188  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
189    static BOOL file_offsets = FALSE;
190  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
191  static BOOL invert = FALSE;  static BOOL invert = FALSE;
192    static BOOL line_buffered = FALSE;
193    static BOOL line_offsets = FALSE;
194  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
195  static BOOL number = FALSE;  static BOOL number = FALSE;
196  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
197    static BOOL resource_error = FALSE;
198  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
199  static BOOL silent = FALSE;  static BOOL silent = FALSE;
200  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
201    
202  /* Structure for options and list of them */  /* Structure for options and list of them */
203    
204  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
205         OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST, OP_BINFILES };
206    
207  typedef struct option_item {  typedef struct option_item {
208    int type;    int type;
# Line 167  typedef struct option_item { Line 215  typedef struct option_item {
215  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
216  used to identify them. */  used to identify them. */
217    
218  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
219  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
220  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
221  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
222  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
223  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
224  #define N_NULL      (-7)  #define N_LABEL        (-7)
225    #define N_LOCALE       (-8)
226    #define N_NULL         (-9)
227    #define N_LOFFSETS     (-10)
228    #define N_FOFFSETS     (-11)
229    #define N_LBUFFER      (-12)
230    #define N_M_LIMIT      (-13)
231    #define N_M_LIMIT_REC  (-14)
232    #define N_BUFSIZE      (-15)
233    #define N_NOJIT        (-16)
234    #define N_FILE_LIST    (-17)
235    #define N_BINARY_FILES (-18)
236    
237  static option_item optionlist[] = {  static option_item optionlist[] = {
238    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
239    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
240    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
241    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
242    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
243    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
244    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
245    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
246    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
247    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
248    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
249    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
250    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
251    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
252    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
253    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
254    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_STRING,     N_FILE_LIST, &file_list,     "file-list=path","read files to search from file" },
255    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
256    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
257    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
258    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
259    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
260    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },  #ifdef SUPPORT_PCREGREP_JIT
261    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
262    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },  #else
263    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
264    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },  #endif
265    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
266      { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
267      { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
268      { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
269      { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
270      { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
271      { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
272      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
273      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
274      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
275      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
276      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
277      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
278      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
279      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
280      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
281      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
282      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
283    
284      /* These two were accidentally implemented with underscores instead of
285      hyphens in the option names. As this was not discovered for several releases,
286      the incorrect versions are left in the table for compatibility. However, the
287      --help function misses out any option that has an underscore in its name. */
288    
289      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
290      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
291    
292  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
293    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
294  #endif  #endif
# Line 240  const char utf8_table4[] = { Line 325  const char utf8_table4[] = {
325    
326    
327  /*************************************************  /*************************************************
328    *         Exit from the program                  *
329    *************************************************/
330    
331    /* If there has been a resource error, give a suitable message.
332    
333    Argument:  the return code
334    Returns:   does not return
335    */
336    
337    static void
338    pcregrep_exit(int rc)
339    {
340    if (resource_error)
341      {
342      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
343        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
344        PCRE_ERROR_JIT_STACKLIMIT);
345      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
346      }
347    
348    exit(rc);
349    }
350    
351    
352    /*************************************************
353  *            OS-specific functions               *  *            OS-specific functions               *
354  *************************************************/  *************************************************/
355    
# Line 303  return (statbuf.st_mode & S_IFMT) == S_I Line 413  return (statbuf.st_mode & S_IFMT) == S_I
413  }  }
414    
415    
416  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
417    
418  static BOOL  static BOOL
419  is_stdout_tty(void)  is_stdout_tty(void)
# Line 311  is_stdout_tty(void) Line 421  is_stdout_tty(void)
421  return isatty(fileno(stdout));  return isatty(fileno(stdout));
422  }  }
423    
424    static BOOL
425    is_file_tty(FILE *f)
426    {
427    return isatty(fileno(f));
428    }
429    
430    
431  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
432    
433  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
434  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
435  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
436    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
437    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
438    undefined when it is indeed undefined. */
439    
440  #elif HAVE_WINDOWS_H  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
441    
442  #ifndef STRICT  #ifndef STRICT
443  # define STRICT  # define STRICT
# Line 327  when it did not exist. */ Line 445  when it did not exist. */
445  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
446  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
447  #endif  #endif
448    
449    #include <windows.h>
450    
451  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
452  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
453  #endif  #endif
454    
 #include <windows.h>  
   
455  typedef struct directory_type  typedef struct directory_type
456  {  {
457  HANDLE handle;  HANDLE handle;
# Line 362  dir = (directory_type *) malloc(sizeof(* Line 481  dir = (directory_type *) malloc(sizeof(*
481  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
482    {    {
483    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
484    exit(2);    pcregrep_exit(2);
485    }    }
486  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
487  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 417  regular if they are not directories. */ Line 536  regular if they are not directories. */
536    
537  int isregfile(char *filename)  int isregfile(char *filename)
538  {  {
539  return !isdirectory(filename)  return !isdirectory(filename);
540  }  }
541    
542    
543  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
544    
545  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
546    
547  static BOOL  static BOOL
548  is_stdout_tty(void)  is_stdout_tty(void)
549  {  {
550  FALSE;  return FALSE;
551    }
552    
553    static BOOL
554    is_file_tty(FILE *f)
555    {
556    return FALSE;
557  }  }
558    
559    
# Line 453  void closedirectory(directory_type *dir) Line 578  void closedirectory(directory_type *dir)
578  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
579    
580    
581  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
582    
583  static BOOL  static BOOL
584  is_stdout_tty(void)  is_stdout_tty(void)
# Line 461  is_stdout_tty(void) Line 586  is_stdout_tty(void)
586  return FALSE;  return FALSE;
587  }  }
588    
589    static BOOL
590    is_file_tty(FILE *f)
591    {
592    return FALSE;
593    }
594    
595  #endif  #endif
596    
# Line 489  return sys_errlist[n]; Line 619  return sys_errlist[n];
619    
620    
621  /*************************************************  /*************************************************
622    *            Read one line of input              *
623    *************************************************/
624    
625    /* Normally, input is read using fread() into a large buffer, so many lines may
626    be read at once. However, doing this for tty input means that no output appears
627    until a lot of input has been typed. Instead, tty input is handled line by
628    line. We cannot use fgets() for this, because it does not stop at a binary
629    zero, and therefore there is no way of telling how many characters it has read,
630    because there may be binary zeros embedded in the data.
631    
632    Arguments:
633      buffer     the buffer to read into
634      length     the maximum number of characters to read
635      f          the file
636    
637    Returns:     the number of characters read, zero at end of file
638    */
639    
640    static unsigned int
641    read_one_line(char *buffer, int length, FILE *f)
642    {
643    int c;
644    int yield = 0;
645    while ((c = fgetc(f)) != EOF)
646      {
647      buffer[yield++] = c;
648      if (c == '\n' || yield >= length) break;
649      }
650    return yield;
651    }
652    
653    
654    
655    /*************************************************
656  *             Find end of line                   *  *             Find end of line                   *
657  *************************************************/  *************************************************/
658    
# Line 500  Arguments: Line 664  Arguments:
664    endptr    end of available data    endptr    end of available data
665    lenptr    where to put the length of the eol sequence    lenptr    where to put the length of the eol sequence
666    
667  Returns:    pointer to the last byte of the line  Returns:    pointer after the last byte of the line,
668                including the newline byte(s)
669  */  */
670    
671  static char *  static char *
# Line 783  if (after_context > 0 && lastmatchnumber Line 948  if (after_context > 0 && lastmatchnumber
948      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
949      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
950      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
951      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
952      lastmatchrestart = pp;      lastmatchrestart = pp;
953      }      }
954    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 793  if (after_context > 0 && lastmatchnumber Line 958  if (after_context > 0 && lastmatchnumber
958    
959    
960  /*************************************************  /*************************************************
961    *   Apply patterns to subject till one matches   *
962    *************************************************/
963    
964    /* This function is called to run through all patterns, looking for a match. It
965    is used multiple times for the same subject when colouring is enabled, in order
966    to find all possible matches.
967    
968    Arguments:
969      matchptr     the start of the subject
970      length       the length of the subject to match
971      startoffset  where to start matching
972      offsets      the offets vector to fill in
973      mrc          address of where to put the result of pcre_exec()
974    
975    Returns:      TRUE if there was a match
976                  FALSE if there was no match
977                  invert if there was a non-fatal error
978    */
979    
980    static BOOL
981    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
982      int *mrc)
983    {
984    int i;
985    size_t slen = length;
986    const char *msg = "this text:\n\n";
987    if (slen > 200)
988      {
989      slen = 200;
990      msg = "text that starts:\n\n";
991      }
992    for (i = 0; i < pattern_count; i++)
993      {
994      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
995        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
996      if (*mrc >= 0) return TRUE;
997      if (*mrc == PCRE_ERROR_NOMATCH) continue;
998      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
999      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
1000      fprintf(stderr, "%s", msg);
1001      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
1002      fprintf(stderr, "\n\n");
1003      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1004          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1005        resource_error = TRUE;
1006      if (error_count++ > 20)
1007        {
1008        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1009        pcregrep_exit(2);
1010        }
1011      return invert;    /* No more matching; don't show the line again */
1012      }
1013    
1014    return FALSE;  /* No match, no errors */
1015    }
1016    
1017    
1018    
1019    /*************************************************
1020  *            Grep an individual file             *  *            Grep an individual file             *
1021  *************************************************/  *************************************************/
1022    
1023  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1024  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1025  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1026  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1027  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
1028  "before" context printing.  "before" context printing.
1029    
1030  Arguments:  Arguments:
1031    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
1032                   the gzFile pointer when reading is via libz
1033                   the BZFILE pointer when reading is via libbz2
1034      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1035      filename     the file name or NULL (for errors)
1036    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1037                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1038                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1039    
1040  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1041                 1 otherwise (no matches)                 1 otherwise (no matches)
1042                   2 if an overlong line is encountered
1043                   3 if there is a read error on a .bz2 file
1044  */  */
1045    
1046  static int  static int
1047  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1048  {  {
1049  int rc = 1;  int rc = 1;
1050  int linenumber = 1;  int linenumber = 1;
1051  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1052  int count = 0;  int count = 0;
1053  int offsets[99];  int filepos = 0;
1054    int offsets[OFFSET_SIZE];
1055  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1056  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1057  char *endptr;  char *endptr;
1058  size_t bufflength;  size_t bufflength;
1059    BOOL binary = FALSE;
1060  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1061    BOOL input_line_buffered = line_buffered;
1062    FILE *in = NULL;                    /* Ensure initialized */
1063    
1064    #ifdef SUPPORT_LIBZ
1065    gzFile ingz = NULL;
1066    #endif
1067    
1068    #ifdef SUPPORT_LIBBZ2
1069    BZFILE *inbz2 = NULL;
1070    #endif
1071    
1072    
1073    /* Do the first read into the start of the buffer and set up the pointer to end
1074    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1075    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1076    fail. */
1077    
1078    #ifdef SUPPORT_LIBZ
1079    if (frtype == FR_LIBZ)
1080      {
1081      ingz = (gzFile)handle;
1082      bufflength = gzread (ingz, main_buffer, bufsize);
1083      }
1084    else
1085    #endif
1086    
1087    #ifdef SUPPORT_LIBBZ2
1088    if (frtype == FR_LIBBZ2)
1089      {
1090      inbz2 = (BZFILE *)handle;
1091      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1092      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1093      }                                    /* without the cast it is unsigned. */
1094    else
1095    #endif
1096    
1097      {
1098      in = (FILE *)handle;
1099      if (is_file_tty(in)) input_line_buffered = TRUE;
1100      bufflength = input_line_buffered?
1101        read_one_line(main_buffer, bufsize, in) :
1102        fread(main_buffer, 1, bufsize, in);
1103      }
1104    
1105    endptr = main_buffer + bufflength;
1106    
1107  /* Do the first read into the start of the buffer and set up the pointer to  /* Unless binary-files=text, see if we have a binary file. This uses the same
1108  end of what we have. */  rule as GNU grep, namely, a search for a binary zero byte near the start of the
1109    file. */
1110    
1111  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  if (binary_files != BIN_TEXT)
1112  endptr = buffer + bufflength;    {
1113      binary =
1114        memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1115      if (binary && binary_files == BIN_NOMATCH) return 1;
1116      }
1117    
1118  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1119  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 841  way, the buffer is shifted left and re-f Line 1122  way, the buffer is shifted left and re-f
1122    
1123  while (ptr < endptr)  while (ptr < endptr)
1124    {    {
1125    int i, endlinelength;    int endlinelength;
1126    int mrc = 0;    int mrc = 0;
1127    BOOL match = FALSE;    int startoffset = 0;
1128      BOOL match;
1129      char *matchptr = ptr;
1130    char *t = ptr;    char *t = ptr;
1131    size_t length, linelength;    size_t length, linelength;
1132    
1133    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1134    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1135    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1136    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1137    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1138    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1139      first line. */
1140    
1141    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1142    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1143    length = multiline? (size_t)(endptr - ptr) : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1144    
1145      /* Check to see if the line we are looking at extends right to the very end
1146      of the buffer without a line terminator. This means the line is too long to
1147      handle. */
1148    
1149      if (endlinelength == 0 && t == main_buffer + bufsize)
1150        {
1151        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1152                        "pcregrep: check the --buffer-size option\n",
1153                        linenumber,
1154                        (filename == NULL)? "" : " of file ",
1155                        (filename == NULL)? "" : filename);
1156        return 2;
1157        }
1158    
1159    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1160    
1161  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
# Line 867  while (ptr < endptr) Line 1165  while (ptr < endptr)
1165        #include <time.h>        #include <time.h>
1166        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1167        struct timezone dummy;        struct timezone dummy;
1168          int i;
1169    
1170        if (jfriedl_XT)        if (jfriedl_XT)
1171        {        {
# Line 875  while (ptr < endptr) Line 1174  while (ptr < endptr)
1174            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1175            if (!ptr) {            if (!ptr) {
1176                    printf("out of memory");                    printf("out of memory");
1177                    exit(2);                    pcregrep_exit(2);
1178            }            }
1179            endptr = ptr;            endptr = ptr;
1180            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 892  while (ptr < endptr) Line 1191  while (ptr < endptr)
1191    
1192    
1193        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1194            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1195                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1196    
1197        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1198                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 906  while (ptr < endptr) Line 1206  while (ptr < endptr)
1206    }    }
1207  #endif  #endif
1208    
1209      /* We come back here after a match when the -o option (only_matching) is set,
1210      in order to find any further matches in the same line. */
1211    
1212    /* Run through all the patterns until one matches. Note that we don't include    ONLY_MATCHING_RESTART:
   the final newline in the subject string. */  
1213    
1214    for (i = 0; i < pattern_count; i++)    /* Run through all the patterns until one matches or there is an error other
1215      {    than NOMATCH. This code is in a subroutine so that it can be re-used for
1216      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    finding subsequent matches when colouring matched lines. */
1217        offsets, 99);  
1218      if (mrc >= 0) { match = TRUE; break; }    match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1219    
1220    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1221    
# Line 952  while (ptr < endptr) Line 1230  while (ptr < endptr)
1230      /* Just count if just counting is wanted. */      /* Just count if just counting is wanted. */
1231    
1232      if (count_only) count++;      if (count_only) count++;
1233    
1234        /* When handling a binary file and binary-files==binary, the "binary"
1235        variable will be set true (it's false in all other cases). In this
1236        situation we just want to output the file name. No need to scan further. */
1237    
1238        else if (binary)
1239          {
1240          fprintf(stdout, "Binary file %s matches\n", filename);
1241          return 0;
1242          }
1243    
1244      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1245      in the file. */      in the file. */
1246    
1247      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1248        {        {
1249        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1250        return 0;        return 0;
# Line 966  while (ptr < endptr) Line 1254  while (ptr < endptr)
1254    
1255      else if (quiet) return 0;      else if (quiet) return 0;
1256    
1257      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, or a
1258      does not pring any context. */      captured portion of it, as long as this string is not empty, and the
1259        --file-offsets and --line-offsets options output offsets for the matching
1260        substring (they both force --only-matching = 0). None of these options
1261        prints any context. Afterwards, adjust the start and then jump back to look
1262        for further matches in the same line. If we are in invert mode, however,
1263        nothing is printed and we do not restart - this could still be useful
1264        because the return code is set. */
1265    
1266      else if (only_matching)      else if (only_matching >= 0)
1267        {        {
1268        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1269        if (number) fprintf(stdout, "%d:", linenumber);          {
1270        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1271        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1272            if (line_offsets)
1273              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1274                offsets[1] - offsets[0]);
1275            else if (file_offsets)
1276              fprintf(stdout, "%d,%d\n",
1277                (int)(filepos + matchptr + offsets[0] - ptr),
1278                offsets[1] - offsets[0]);
1279            else if (only_matching < mrc)
1280              {
1281              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1282              if (plen > 0)
1283                {
1284                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1285                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1286                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1287                fprintf(stdout, "\n");
1288                }
1289              }
1290            else if (printname != NULL || number) fprintf(stdout, "\n");
1291            match = FALSE;
1292            if (line_buffered) fflush(stdout);
1293            rc = 0;                      /* Had some success */
1294            startoffset = offsets[1];    /* Restart after the match */
1295            goto ONLY_MATCHING_RESTART;
1296            }
1297        }        }
1298    
1299      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 1008  while (ptr < endptr) Line 1327  while (ptr < endptr)
1327            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1328            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1329            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1330            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1331            lastmatchrestart = pp;            lastmatchrestart = pp;
1332            }            }
1333          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1031  while (ptr < endptr) Line 1350  while (ptr < endptr)
1350          int linecount = 0;          int linecount = 0;
1351          char *p = ptr;          char *p = ptr;
1352    
1353          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1354                 linecount < before_context)                 linecount < before_context)
1355            {            {
1356            linecount++;            linecount++;
1357            p = previous_line(p, buffer);            p = previous_line(p, main_buffer);
1358            }            }
1359    
1360          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 1048  while (ptr < endptr) Line 1367  while (ptr < endptr)
1367            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1368            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1369            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1370            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1371            p = pp;            p = pp;
1372            }            }
1373          }          }
# Line 1068  while (ptr < endptr) Line 1387  while (ptr < endptr)
1387        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1388        the match will always be before the first newline sequence. */        the match will always be before the first newline sequence. */
1389    
1390        if (multiline)        if (multiline & !invert)
1391          {          {
1392          int ellength;          char *endmatch = ptr + offsets[1];
1393          char *endmatch = ptr;          t = ptr;
1394          if (!invert)          while (t < endmatch)
1395            {            {
1396            endmatch += offsets[1];            t = end_of_line(t, endptr, &endlinelength);
1397            t = ptr;            if (t < endmatch) linenumber++; else break;
           while (t < endmatch)  
             {  
             t = end_of_line(t, endptr, &ellength);  
             if (t <= endmatch) linenumber++; else break;  
             }  
1398            }            }
1399          endmatch = end_of_line(endmatch, endptr, &ellength);          linelength = t - ptr - endlinelength;
         linelength = endmatch - ptr - ellength;  
1400          }          }
1401    
1402        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 1098  while (ptr < endptr) Line 1411  while (ptr < endptr)
1411          {          {
1412          int first = S_arg * 2;          int first = S_arg * 2;
1413          int last  = first + 1;          int last  = first + 1;
1414          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1415          fprintf(stdout, "X");          fprintf(stdout, "X");
1416          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1417          }          }
1418        else        else
1419  #endif  #endif
1420    
1421        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1422          matches, but not of course if the line is a non-match. */
1423    
1424        if (do_colour)        if (do_colour && !invert)
1425          {          {
1426          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1427            FWRITE(ptr, 1, offsets[0], stdout);
1428          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1429          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1430          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1431          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1432              {
1433              startoffset = offsets[1];
1434              if (startoffset >= (int)linelength + endlinelength ||
1435                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1436                break;
1437              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1438              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1439              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1440              fprintf(stdout, "%c[00m", 0x1b);
1441              }
1442    
1443            /* In multiline mode, we may have already printed the complete line
1444            and its line-ending characters (if they matched the pattern), so there
1445            may be no more to print. */
1446    
1447            plength = (int)((linelength + endlinelength) - startoffset);
1448            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1449          }          }
1450        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1451          /* Not colouring; no need to search for further matches */
1452    
1453          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1454        }        }
1455    
1456      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1457        given, flush the output. */
1458    
1459        if (line_buffered) fflush(stdout);
1460      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1461    
1462      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1147  while (ptr < endptr) Line 1484  while (ptr < endptr)
1484      linelength = endmatch - ptr - ellength;      linelength = endmatch - ptr - ellength;
1485      }      }
1486    
1487    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. The file
1488      offset to the current line is maintained in filepos. */
1489    
1490    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1491      filepos += (int)(linelength + endlinelength);
1492    linenumber++;    linenumber++;
1493    
1494      /* If input is line buffered, and the buffer is not yet full, read another
1495      line and add it into the buffer. */
1496    
1497      if (input_line_buffered && bufflength < (size_t)bufsize)
1498        {
1499        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1500        bufflength += add;
1501        endptr += add;
1502        }
1503    
1504    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1505    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1506    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1507    about to be lost, print them. */    about to be lost, print them. */
1508    
1509    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1510      {      {
1511      if (after_context > 0 &&      if (after_context > 0 &&
1512          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1513          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1514        {        {
1515        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1516        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 1169  while (ptr < endptr) Line 1518  while (ptr < endptr)
1518    
1519      /* Now do the shuffle */      /* Now do the shuffle */
1520    
1521      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1522      ptr -= MBUFTHIRD;      ptr -= bufthird;
1523      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1524      endptr = buffer + bufflength;  #ifdef SUPPORT_LIBZ
1525        if (frtype == FR_LIBZ)
1526          bufflength = 2*bufthird +
1527            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1528        else
1529    #endif
1530    
1531    #ifdef SUPPORT_LIBBZ2
1532        if (frtype == FR_LIBBZ2)
1533          bufflength = 2*bufthird +
1534            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1535        else
1536    #endif
1537    
1538        bufflength = 2*bufthird +
1539          (input_line_buffered?
1540           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1541           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1542        endptr = main_buffer + bufflength;
1543    
1544      /* Adjust any last match point */      /* Adjust any last match point */
1545    
1546      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1547      }      }
1548    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1549    
1550  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1551  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1552    
1553  if (!only_matching && !count_only)  if (only_matching < 0 && !count_only)
1554    {    {
1555    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1556    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1202  if (filenames == FN_NOMATCH_ONLY) Line 1569  if (filenames == FN_NOMATCH_ONLY)
1569    
1570  if (count_only)  if (count_only)
1571    {    {
1572    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1573    fprintf(stdout, "%d\n", count);      {
1574        if (printname != NULL && filenames != FN_NONE)
1575          fprintf(stdout, "%s:", printname);
1576        fprintf(stdout, "%d\n", count);
1577        }
1578    }    }
1579    
1580  return rc;  return rc;
# Line 1235  grep_or_recurse(char *pathname, BOOL dir Line 1606  grep_or_recurse(char *pathname, BOOL dir
1606  {  {
1607  int rc = 1;  int rc = 1;
1608  int sep;  int sep;
1609  FILE *in;  int frtype;
1610    void *handle;
1611    FILE *in = NULL;           /* Ensure initialized */
1612    
1613    #ifdef SUPPORT_LIBZ
1614    gzFile ingz = NULL;
1615    #endif
1616    
1617    #ifdef SUPPORT_LIBBZ2
1618    BZFILE *inbz2 = NULL;
1619    #endif
1620    
1621    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1622    int pathlen;
1623    #endif
1624    
1625  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1626    
1627  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1628    {    {
1629    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN, stdin_name,
1630      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1631        stdin_name : NULL);        stdin_name : NULL);
1632    }    }
1633    
   
1634  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1635  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1636  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1637    system-specific. */
1638    
1639  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1640    {    {
# Line 1270  if ((sep = isdirectory(pathname)) != 0) Line 1655  if ((sep = isdirectory(pathname)) != 0)
1655    
1656      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1657        {        {
1658        int frc, blen;        int frc, nflen;
1659        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1660        blen = strlen(buffer);        nflen = (int)(strlen(nextfile));
1661    
1662        if (exclude_compiled != NULL &&        if (isdirectory(buffer))
1663            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)          {
1664          continue;          if (exclude_dir_compiled != NULL &&
1665                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1666        if (include_compiled != NULL &&            continue;
1667            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
1668          continue;          if (include_dir_compiled != NULL &&
1669                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1670              continue;
1671            }
1672          else
1673            {
1674            if (exclude_compiled != NULL &&
1675                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1676              continue;
1677    
1678            if (include_compiled != NULL &&
1679                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1680              continue;
1681            }
1682    
1683        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1684        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 1303  skipping was not requested. The scan pro Line 1701  skipping was not requested. The scan pro
1701  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1702  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1703    
1704  in = fopen(pathname, "r");  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1705  if (in == NULL)  pathlen = (int)(strlen(pathname));
1706    #endif
1707    
1708    /* Open using zlib if it is supported and the file name ends with .gz. */
1709    
1710    #ifdef SUPPORT_LIBZ
1711    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1712      {
1713      ingz = gzopen(pathname, "rb");
1714      if (ingz == NULL)
1715        {
1716        if (!silent)
1717          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1718            strerror(errno));
1719        return 2;
1720        }
1721      handle = (void *)ingz;
1722      frtype = FR_LIBZ;
1723      }
1724    else
1725    #endif
1726    
1727    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1728    
1729    #ifdef SUPPORT_LIBBZ2
1730    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1731      {
1732      inbz2 = BZ2_bzopen(pathname, "rb");
1733      handle = (void *)inbz2;
1734      frtype = FR_LIBBZ2;
1735      }
1736    else
1737    #endif
1738    
1739    /* Otherwise use plain fopen(). The label is so that we can come back here if
1740    an attempt to read a .bz2 file indicates that it really is a plain file. */
1741    
1742    #ifdef SUPPORT_LIBBZ2
1743    PLAIN_FILE:
1744    #endif
1745      {
1746      in = fopen(pathname, "rb");
1747      handle = (void *)in;
1748      frtype = FR_PLAIN;
1749      }
1750    
1751    /* All the opening methods return errno when they fail. */
1752    
1753    if (handle == NULL)
1754    {    {
1755    if (!silent)    if (!silent)
1756      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 1312  if (in == NULL) Line 1758  if (in == NULL)
1758    return 2;    return 2;
1759    }    }
1760    
1761  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1762    
1763    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1764    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1765    
1766    /* Close in an appropriate manner. */
1767    
1768    #ifdef SUPPORT_LIBZ
1769    if (frtype == FR_LIBZ)
1770      gzclose(ingz);
1771    else
1772    #endif
1773    
1774    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1775    read failed. If the error indicates that the file isn't in fact bzipped, try
1776    again as a normal file. */
1777    
1778    #ifdef SUPPORT_LIBBZ2
1779    if (frtype == FR_LIBBZ2)
1780      {
1781      if (rc == 3)
1782        {
1783        int errnum;
1784        const char *err = BZ2_bzerror(inbz2, &errnum);
1785        if (errnum == BZ_DATA_ERROR_MAGIC)
1786          {
1787          BZ2_bzclose(inbz2);
1788          goto PLAIN_FILE;
1789          }
1790        else if (!silent)
1791          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1792            pathname, err);
1793        rc = 2;    /* The normal "something went wrong" code */
1794        }
1795      BZ2_bzclose(inbz2);
1796      }
1797    else
1798    #endif
1799    
1800    /* Normal file close */
1801    
1802  fclose(in);  fclose(in);
1803    
1804    /* Pass back the yield from pcregrep(). */
1805    
1806  return rc;  return rc;
1807  }  }
1808    
# Line 1336  for (op = optionlist; op->one_char != 0; Line 1823  for (op = optionlist; op->one_char != 0;
1823    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1824    }    }
1825  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1826  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1827      "options.\n");
1828  return rc;  return rc;
1829  }  }
1830    
# Line 1355  option_item *op; Line 1843  option_item *op;
1843  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1844  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1845  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1846  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1847  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1848    #ifdef SUPPORT_LIBZ
1849    printf("Files whose names end in .gz are read using zlib.\n");
1850    #endif
1851    
1852    #ifdef SUPPORT_LIBBZ2
1853    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1854    #endif
1855    
1856    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1857    printf("Other files and the standard input are read as plain files.\n\n");
1858    #else
1859    printf("All files are read as plain files, without any interpretation.\n\n");
1860    #endif
1861    
1862    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1863  printf("Options:\n");  printf("Options:\n");
1864    
1865  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
1866    {    {
1867    int n;    int n;
1868    char s[4];    char s[4];
1869    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");  
1870    printf("  %s --%s%n", s, op->long_name, &n);    /* Two options were accidentally implemented and documented with underscores
1871    n = 30 - n;    instead of hyphens in their names, something that was not noticed for quite a
1872      few releases. When fixing this, I left the underscored versions in the list
1873      in case people were using them. However, we don't want to display them in the
1874      help data. There are no other options that contain underscores, and we do not
1875      expect ever to implement such options. Therefore, just omit any option that
1876      contains an underscore. */
1877    
1878      if (strchr(op->long_name, '_') != NULL) continue;
1879    
1880      if (op->one_char > 0 && (op->long_name)[0] == 0)
1881        n = 31 - printf("  -%c", op->one_char);
1882      else
1883        {
1884        if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1885          else strcpy(s, "   ");
1886        n = 31 - printf("  %s --%s", s, op->long_name);
1887        }
1888    
1889    if (n < 1) n = 1;    if (n < 1) n = 1;
1890    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                           ", op->help_text);
1891    }    }
1892    
1893  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1894  printf("trailing white space is removed and blank lines are ignored.\n");  printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1895  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("When reading patterns or file names from a file, trailing white\n");
1896    printf("space is removed and blank lines are ignored.\n");
1897    printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1898      MAX_PATTERN_COUNT, PATBUFSIZE);
1899    
1900  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1901  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 1391  handle_option(int letter, int options) Line 1913  handle_option(int letter, int options)
1913  {  {
1914  switch(letter)  switch(letter)
1915    {    {
1916    case N_HELP: help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1917      case N_HELP: help(); pcregrep_exit(0);
1918      case N_LBUFFER: line_buffered = TRUE; break;
1919      case N_LOFFSETS: line_offsets = number = TRUE; break;
1920      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1921      case 'a': binary_files = BIN_TEXT; break;
1922    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1923    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1924    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1925      case 'I': binary_files = BIN_NOMATCH; break;
1926    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1927    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1928    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1929    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1930    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1931    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1932    case 'o': only_matching = TRUE; break;    case 'o': only_matching = 0; break;
1933    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1934    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1935    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1412  switch(letter) Line 1940  switch(letter)
1940    
1941    case 'V':    case 'V':
1942    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1943    exit(0);    pcregrep_exit(0);
1944    break;    break;
1945    
1946    default:    default:
1947    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1948    exit(usage(2));    pcregrep_exit(usage(2));
1949    }    }
1950    
1951  return options;  return options;
# Line 1472  Returns:         TRUE on success, FALSE Line 2000  Returns:         TRUE on success, FALSE
2000  static BOOL  static BOOL
2001  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_single_pattern(char *pattern, int options, char *filename, int count)
2002  {  {
2003  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
2004  const char *error;  const char *error;
2005  int errptr;  int errptr;
2006    
# Line 1483  if (pattern_count >= MAX_PATTERN_COUNT) Line 2011  if (pattern_count >= MAX_PATTERN_COUNT)
2011    return FALSE;    return FALSE;
2012    }    }
2013    
2014  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
2015    suffix[process_options]);    suffix[process_options]);
2016  pattern_list[pattern_count] =  pattern_list[pattern_count] =
2017    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
# Line 1542  compile_pattern(char *pattern, int optio Line 2070  compile_pattern(char *pattern, int optio
2070  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
2071    {    {
2072    char *eop = pattern + strlen(pattern);    char *eop = pattern + strlen(pattern);
2073    char buffer[MBUFTHIRD];    char buffer[PATBUFSIZE];
2074    for(;;)    for(;;)
2075      {      {
2076      int ellength;      int ellength;
# Line 1580  char *patterns[MAX_PATTERN_COUNT]; Line 2108  char *patterns[MAX_PATTERN_COUNT];
2108  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2109  const char *error;  const char *error;
2110    
2111    #ifdef SUPPORT_PCREGREP_JIT
2112    pcre_jit_stack *jit_stack = NULL;
2113    #endif
2114    
2115  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2116  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2117  */  Note that the return values from pcre_config(), though derived from the ASCII
2118    codes, are the same in EBCDIC environments, so we must use the actual values
2119    rather than escapes such as as '\r'. */
2120    
2121  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2122  switch(i)  switch(i)
2123    {    {
2124    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
2125    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
2126    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2127    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
2128    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
2129    }    }
2130    
2131  /* Process the options */  /* Process the options */
# Line 1611  for (i = 1; i < argc; i++) Line 2145  for (i = 1; i < argc; i++)
2145    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2146      {      {
2147      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2148        else exit(usage(2));        else pcregrep_exit(usage(2));
2149      }      }
2150    
2151    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1633  for (i = 1; i < argc; i++) Line 2167  for (i = 1; i < argc; i++)
2167      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2168      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2169      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2170      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2171      these categories, fortunately. */      both these categories. */
2172    
2173      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2174        {        {
2175        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2176        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2177        if (opbra == NULL)     /* Not a (p) case */  
2178          /* Handle options with only one spelling of the name */
2179    
2180          if (opbra == NULL)     /* Does not contain '(' */
2181          {          {
2182          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2183            {            {
# Line 1648  for (i = 1; i < argc; i++) Line 2185  for (i = 1; i < argc; i++)
2185            }            }
2186          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2187            {            {
2188            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2189            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2190                (int)strlen(arg) : (int)(argequals - arg);
2191            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2192              {              {
2193              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1662  for (i = 1; i < argc; i++) Line 2200  for (i = 1; i < argc; i++)
2200              }              }
2201            }            }
2202          }          }
2203        else                   /* Special case xxxx(p) */  
2204          /* Handle options with an alternate spelling of the name */
2205    
2206          else
2207          {          {
2208          char buff1[24];          char buff1[24];
2209          char buff2[24];          char buff2[24];
2210          int baselen = opbra - op->long_name;  
2211            int baselen = (int)(opbra - op->long_name);
2212            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2213            int arglen = (argequals == NULL || equals == NULL)?
2214              (int)strlen(arg) : (int)(argequals - arg);
2215    
2216          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2217          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2218            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2219          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2220               strncmp(arg, buff2, arglen) == 0)
2221              {
2222              if (equals != NULL && argequals != NULL)
2223                {
2224                option_data = argequals;
2225                if (*option_data == '=')
2226                  {
2227                  option_data++;
2228                  longopwasequals = TRUE;
2229                  }
2230                }
2231            break;            break;
2232              }
2233          }          }
2234        }        }
2235    
2236      if (op->one_char == 0)      if (op->one_char == 0)
2237        {        {
2238        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2239        exit(usage(2));        pcregrep_exit(usage(2));
2240        }        }
2241      }      }
2242    
   
2243    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2244    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2245    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1716  for (i = 1; i < argc; i++) Line 2273  for (i = 1; i < argc; i++)
2273      while (*s != 0)      while (*s != 0)
2274        {        {
2275        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2276          { if (*s == op->one_char) break; }          {
2277            if (*s == op->one_char) break;
2278            }
2279        if (op->one_char == 0)        if (op->one_char == 0)
2280          {          {
2281          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2282            *s, argv[i]);            *s, argv[i]);
2283          exit(usage(2));          pcregrep_exit(usage(2));
2284            }
2285    
2286          /* Check for a single-character option that has data: OP_OP_NUMBER
2287          is used for one that either has a numerical number or defaults, i.e. the
2288          data is optional. If a digit follows, there is data; if not, carry on
2289          with other single-character options in the same string. */
2290    
2291          option_data = s+1;
2292          if (op->type == OP_OP_NUMBER)
2293            {
2294            if (isdigit((unsigned char)s[1])) break;
2295          }          }
2296        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for end or a dataless option */
2297          {          {
2298          option_data = s+1;          if (op->type != OP_NODATA || s[1] == 0) break;
         break;  
2299          }          }
2300    
2301          /* Handle a single-character option with no data, then loop for the
2302          next character in the string. */
2303    
2304        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2305        }        }
2306      }      }
# Line 1744  for (i = 1; i < argc; i++) Line 2317  for (i = 1; i < argc; i++)
2317    
2318    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2319    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2320    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2321    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2322    
2323    if (*option_data == 0 &&    if (*option_data == 0 &&
2324        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1755  for (i = 1; i < argc; i++) Line 2328  for (i = 1; i < argc; i++)
2328        case N_COLOUR:        case N_COLOUR:
2329        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2330        break;        break;
2331    
2332          case 'o':
2333          only_matching = 0;
2334          break;
2335    
2336  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2337        case 'S':        case 'S':
2338        S_arg = 0;        S_arg = 0;
# Line 1771  for (i = 1; i < argc; i++) Line 2349  for (i = 1; i < argc; i++)
2349      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2350        {        {
2351        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2352        exit(usage(2));        pcregrep_exit(usage(2));
2353        }        }
2354      option_data = argv[++i];      option_data = argv[++i];
2355      }      }
2356    
2357    /* If the option type is OP_PATLIST, it's the -e option, which can be called    /* If the option type is OP_PATLIST, it's the -e option, which can be called
2358    multiple times to create a list of patterns. */    multiple times to create a list of patterns. */
2359    
2360    if (op->type == OP_PATLIST)    if (op->type == OP_PATLIST)
2361      {      {
2362      if (cmd_pattern_count >= MAX_PATTERN_COUNT)      if (cmd_pattern_count >= MAX_PATTERN_COUNT)
# Line 1789  for (i = 1; i < argc; i++) Line 2367  for (i = 1; i < argc; i++)
2367        }        }
2368      patterns[cmd_pattern_count++] = option_data;      patterns[cmd_pattern_count++] = option_data;
2369      }      }
2370    
2371      /* Handle OP_BINARY_FILES */
2372    
2373      else if (op->type == OP_BINFILES)
2374        {
2375        if (strcmp(option_data, "binary") == 0)
2376          binary_files = BIN_BINARY;
2377        else if (strcmp(option_data, "without-match") == 0)
2378          binary_files = BIN_NOMATCH;
2379        else if (strcmp(option_data, "text") == 0)
2380          binary_files = BIN_TEXT;
2381        else
2382          {
2383          fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2384            option_data);
2385          pcregrep_exit(usage(2));
2386          }
2387        }
2388    
2389    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
2390    
2391    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2392               op->type != OP_OP_NUMBER)
2393      {      {
2394      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2395      }      }
2396    
2397      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2398      only for unpicking arguments, so just keep it simple. */
2399    
2400    else    else
2401      {      {
2402      char *endptr;      unsigned long int n = 0;
2403      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2404        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2405        while (isdigit((unsigned char)(*endptr)))
2406          n = n * 10 + (int)(*endptr++ - '0');
2407        if (toupper(*endptr) == 'K')
2408          {
2409          n *= 1024;
2410          endptr++;
2411          }
2412        else if (toupper(*endptr) == 'M')
2413          {
2414          n *= 1024*1024;
2415          endptr++;
2416          }
2417      if (*endptr != 0)      if (*endptr != 0)
2418        {        {
2419        if (longop)        if (longop)
2420          {          {
2421          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2422          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2423            equals - op->long_name;            (int)(equals - op->long_name);
2424          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2425            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2426          }          }
2427        else        else
2428          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2429            option_data, op->one_char);            option_data, op->one_char);
2430        exit(usage(2));        pcregrep_exit(usage(2));
2431        }        }
2432      *((int *)op->dataptr) = n;      if (op->type == OP_LONGNUMBER)
2433            *((unsigned long int *)op->dataptr) = n;
2434        else
2435            *((int *)op->dataptr) = n;
2436      }      }
2437    }    }
2438    
# Line 1828  if (both_context > 0) Line 2445  if (both_context > 0)
2445    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2446    }    }
2447    
2448    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2449    However, the latter two set only_matching. */
2450    
2451    if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2452        (file_offsets && line_offsets))
2453      {
2454      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2455        "and/or --line-offsets\n");
2456      pcregrep_exit(usage(2));
2457      }
2458    
2459    if (file_offsets || line_offsets) only_matching = 0;
2460    
2461  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2462  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2463    
# Line 1950  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 2580  if (jfriedl_XT != 0 || jfriedl_XR != 0)
2580    }    }
2581  #endif  #endif
2582    
2583  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer, and to store the pattern and hints lists. */
2584    
2585    bufsize = 3*bufthird;
2586    main_buffer = (char *)malloc(bufsize);
2587  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2588  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2589    
2590  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2591    {    {
2592    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2593    goto EXIT2;    goto EXIT2;
# Line 1987  if (pattern_filename != NULL) Line 2619  if (pattern_filename != NULL)
2619    int linenumber = 0;    int linenumber = 0;
2620    FILE *f;    FILE *f;
2621    char *filename;    char *filename;
2622    char buffer[MBUFTHIRD];    char buffer[PATBUFSIZE];
2623    
2624    if (strcmp(pattern_filename, "-") == 0)    if (strcmp(pattern_filename, "-") == 0)
2625      {      {
# Line 2006  if (pattern_filename != NULL) Line 2638  if (pattern_filename != NULL)
2638      filename = pattern_filename;      filename = pattern_filename;
2639      }      }
2640    
2641    while (fgets(buffer, MBUFTHIRD, f) != NULL)    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2642      {      {
2643      char *s = buffer + (int)strlen(buffer);      char *s = buffer + (int)strlen(buffer);
2644      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
# Line 2020  if (pattern_filename != NULL) Line 2652  if (pattern_filename != NULL)
2652    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
2653    }    }
2654    
2655  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times. Unless
2656    JIT has been explicitly disabled, arrange a stack for it to use. */
2657    
2658    #ifdef SUPPORT_PCREGREP_JIT
2659    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2660      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2661    #endif
2662    
2663  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2664    {    {
2665    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2666    if (error != NULL)    if (error != NULL)
2667      {      {
2668      char s[16];      char s[16];
# Line 2033  for (j = 0; j < pattern_count; j++) Line 2671  for (j = 0; j < pattern_count; j++)
2671      goto EXIT2;      goto EXIT2;
2672      }      }
2673    hint_count++;    hint_count++;
2674    #ifdef SUPPORT_PCREGREP_JIT
2675      if (jit_stack != NULL && hints_list[j] != NULL)
2676        pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2677    #endif
2678      }
2679    
2680    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2681    pcre_extra block for each pattern. */
2682    
2683    if (match_limit > 0 || match_limit_recursion > 0)
2684      {
2685      for (j = 0; j < pattern_count; j++)
2686        {
2687        if (hints_list[j] == NULL)
2688          {
2689          hints_list[j] = malloc(sizeof(pcre_extra));
2690          if (hints_list[j] == NULL)
2691            {
2692            fprintf(stderr, "pcregrep: malloc failed\n");
2693            pcregrep_exit(2);
2694            }
2695          }
2696        if (match_limit > 0)
2697          {
2698          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2699          hints_list[j]->match_limit = match_limit;
2700          }
2701        if (match_limit_recursion > 0)
2702          {
2703          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2704          hints_list[j]->match_limit_recursion = match_limit_recursion;
2705          }
2706        }
2707    }    }
2708    
2709  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 2061  if (include_pattern != NULL) Line 2732  if (include_pattern != NULL)
2732      }      }
2733    }    }
2734    
2735  /* If there are no further arguments, do the business on stdin and exit. */  if (exclude_dir_pattern != NULL)
2736      {
2737      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2738        pcretables);
2739      if (exclude_dir_compiled == NULL)
2740        {
2741        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2742          errptr, error);
2743        goto EXIT2;
2744        }
2745      }
2746    
2747    if (include_dir_pattern != NULL)
2748      {
2749      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2750        pcretables);
2751      if (include_dir_compiled == NULL)
2752        {
2753        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2754          errptr, error);
2755        goto EXIT2;
2756        }
2757      }
2758    
2759    /* If a file that contains a list of files to search has been specified, read
2760    it line by line and search the given files. Otherwise, if there are no further
2761    arguments, do the business on stdin and exit. */
2762    
2763    if (file_list != NULL)
2764      {
2765      char buffer[PATBUFSIZE];
2766      FILE *fl;
2767      if (strcmp(file_list, "-") == 0) fl = stdin; else
2768        {
2769        fl = fopen(file_list, "rb");
2770        if (fl == NULL)
2771          {
2772          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", file_list,
2773            strerror(errno));
2774          goto EXIT2;
2775          }
2776        }
2777      while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2778        {
2779        int frc;
2780        char *end = buffer + (int)strlen(buffer);
2781        while (end > buffer && isspace(end[-1])) end--;
2782        *end = 0;
2783        if (*buffer != 0)
2784          {
2785          frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
2786          if (frc > 1) rc = frc;
2787            else if (frc == 0 && rc == 1) rc = 0;
2788          }
2789        }
2790      if (fl != stdin) fclose (fl);
2791      }
2792    
2793    /* Do this only if there was no file list (and no file arguments). */
2794    
2795  if (i >= argc)  else if (i >= argc)
2796    {    {
2797    rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2798        (filenames > FN_DEFAULT)? stdin_name : NULL);
2799    goto EXIT;    goto EXIT;
2800    }    }
2801    
2802  /* Otherwise, work through the remaining arguments as files or directories.  /* After handling file-list or if there are remaining arguments, work through
2803  Pass in the fact that there is only one argument at top level - this suppresses  them as files or directories. Pass in the fact that there is only one argument
2804  the file name if the argument is not a directory and filenames are not  at top level - this suppresses the file name if the argument is not a directory
2805  otherwise forced. */  and filenames are not otherwise forced. */
2806    
2807  only_one_at_top = i == argc - 1;   /* Catch initial value of i */  only_one_at_top = i == argc - 1 && file_list == NULL;
2808    
2809  for (; i < argc; i++)  for (; i < argc; i++)
2810    {    {
# Line 2085  for (; i < argc; i++) Line 2815  for (; i < argc; i++)
2815    }    }
2816    
2817  EXIT:  EXIT:
2818    #ifdef SUPPORT_PCREGREP_JIT
2819    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2820    #endif
2821    if (main_buffer != NULL) free(main_buffer);
2822  if (pattern_list != NULL)  if (pattern_list != NULL)
2823    {    {
2824    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
# Line 2092  if (pattern_list != NULL) Line 2826  if (pattern_list != NULL)
2826    }    }
2827  if (hints_list != NULL)  if (hints_list != NULL)
2828    {    {
2829    for (i = 0; i < hint_count; i++) free(hints_list[i]);    for (i = 0; i < hint_count; i++)
2830        {
2831        if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2832        }
2833    free(hints_list);    free(hints_list);
2834    }    }
2835  return rc;  pcregrep_exit(rc);
2836    
2837  EXIT2:  EXIT2:
2838  rc = 2;  rc = 2;

Legend:
Removed from v.236  
changed lines
  Added in v.947

  ViewVC Help
Powered by ViewVC 1.1.5