/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 199 by ph10, Tue Jul 31 14:39:09 2007 UTC revision 644 by ph10, Sat Jul 30 17:13:00 2011 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
41  #include <config.h>  #include "config.h"
42  #endif  #endif
43    
44  #include <ctype.h>  #include <ctype.h>
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 55  POSSIBILITY OF SUCH DAMAGE.
55  #include <unistd.h>  #include <unistd.h>
56  #endif  #endif
57    
58  #include <pcre.h>  #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66    #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
69  #define TRUE 1  #define TRUE 1
# Line 63  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define PATBUFSIZE BUFSIZ
78  #else  #else
79  #define MBUFTHIRD 8192  #define PATBUFSIZE 8192
80  #endif  #endif
81    
82  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
# Line 91  enum { DEE_READ, DEE_SKIP }; Line 104  enum { DEE_READ, DEE_SKIP };
104    
105  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 114  static char *colour_string = (char *)"1; Line 135  static char *colour_string = (char *)"1;
135  static char *colour_option = NULL;  static char *colour_option = NULL;
136  static char *dee_option = NULL;  static char *dee_option = NULL;
137  static char *DEE_option = NULL;  static char *DEE_option = NULL;
138    static char *main_buffer = NULL;
139  static char *newline = NULL;  static char *newline = NULL;
140  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
141  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
# Line 127  static pcre_extra **hints_list = NULL; Line 149  static pcre_extra **hints_list = NULL;
149    
150  static char *include_pattern = NULL;  static char *include_pattern = NULL;
151  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
152    static char *include_dir_pattern = NULL;
153    static char *exclude_dir_pattern = NULL;
154    
155  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
156  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
157    static pcre *include_dir_compiled = NULL;
158    static pcre *exclude_dir_compiled = NULL;
159    
160  static int after_context = 0;  static int after_context = 0;
161  static int before_context = 0;  static int before_context = 0;
162  static int both_context = 0;  static int both_context = 0;
163    static int bufthird = PCREGREP_BUFSIZE;
164    static int bufsize = 3*PCREGREP_BUFSIZE;
165  static int dee_action = dee_READ;  static int dee_action = dee_READ;
166  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
167  static int error_count = 0;  static int error_count = 0;
168  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
169    static int only_matching = -1;
170  static int process_options = 0;  static int process_options = 0;
171    
172    static unsigned long int match_limit = 0;
173    static unsigned long int match_limit_recursion = 0;
174    
175  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
176  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
177    static BOOL file_offsets = FALSE;
178  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
179  static BOOL invert = FALSE;  static BOOL invert = FALSE;
180    static BOOL line_buffered = FALSE;
181    static BOOL line_offsets = FALSE;
182  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
183  static BOOL number = FALSE;  static BOOL number = FALSE;
184  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
185    static BOOL resource_error = FALSE;
186  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
187  static BOOL silent = FALSE;  static BOOL silent = FALSE;
188  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
189    
190  /* Structure for options and list of them */  /* Structure for options and list of them */
191    
192  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
193         OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST };
194    
195  typedef struct option_item {  typedef struct option_item {
196    int type;    int type;
# Line 167  typedef struct option_item { Line 203  typedef struct option_item {
203  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
204  used to identify them. */  used to identify them. */
205    
206  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
207  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
208  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
209  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
210  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
211  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
212  #define N_NULL      (-7)  #define N_LABEL        (-7)
213    #define N_LOCALE       (-8)
214    #define N_NULL         (-9)
215    #define N_LOFFSETS     (-10)
216    #define N_FOFFSETS     (-11)
217    #define N_LBUFFER      (-12)
218    #define N_M_LIMIT      (-13)
219    #define N_M_LIMIT_REC  (-14)
220    #define N_BUFSIZE      (-15)
221    
222  static option_item optionlist[] = {  static option_item optionlist[] = {
223    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
224    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
225    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
226    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
227    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
228    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
229    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
230    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
231    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
232    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
233    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
234    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
235    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
236    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
237    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
238    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
239    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
240    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
241    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
242    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
243    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
244    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
245    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
246    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
247    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
248    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
249    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
250    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
251      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
252      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
253      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
254      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
255      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
256      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
257      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
258      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
259    
260      /* These two were accidentally implemented with underscores instead of
261      hyphens in the option names. As this was not discovered for several releases,
262      the incorrect versions are left in the table for compatibility. However, the
263      --help function misses out any option that has an underscore in its name. */
264    
265      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
266      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
267    
268  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
269    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
270  #endif  #endif
# Line 240  const char utf8_table4[] = { Line 301  const char utf8_table4[] = {
301    
302    
303  /*************************************************  /*************************************************
304    *         Exit from the program                  *
305    *************************************************/
306    
307    /* If there has been a resource error, give a suitable message.
308    
309    Argument:  the return code
310    Returns:   does not return
311    */
312    
313    static void
314    pcregrep_exit(int rc)
315    {
316    if (resource_error)
317      {
318      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
319        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
320      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
321      }
322    
323    exit(rc);
324    }
325    
326    
327    /*************************************************
328  *            OS-specific functions               *  *            OS-specific functions               *
329  *************************************************/  *************************************************/
330    
# Line 303  return (statbuf.st_mode & S_IFMT) == S_I Line 388  return (statbuf.st_mode & S_IFMT) == S_I
388  }  }
389    
390    
391  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
392    
393  static BOOL  static BOOL
394  is_stdout_tty(void)  is_stdout_tty(void)
# Line 311  is_stdout_tty(void) Line 396  is_stdout_tty(void)
396  return isatty(fileno(stdout));  return isatty(fileno(stdout));
397  }  }
398    
399    static BOOL
400    is_file_tty(FILE *f)
401    {
402    return isatty(fileno(f));
403    }
404    
405    
406  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
407    
408  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
409  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
410  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
411    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
412    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
413    undefined when it is indeed undefined. */
414    
415    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
 #elif HAVE_WINDOWS_H  
416    
417  #ifndef STRICT  #ifndef STRICT
418  # define STRICT  # define STRICT
# Line 327  when it did not exist. */ Line 420  when it did not exist. */
420  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
421  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
422  #endif  #endif
423    
424    #include <windows.h>
425    
426  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
427  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
428  #endif  #endif
429    
 #include <windows.h>  
   
430  typedef struct directory_type  typedef struct directory_type
431  {  {
432  HANDLE handle;  HANDLE handle;
# Line 362  dir = (directory_type *) malloc(sizeof(* Line 456  dir = (directory_type *) malloc(sizeof(*
456  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
457    {    {
458    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
459    exit(2);    pcregrep_exit(2);
460    }    }
461  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
462  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 417  regular if they are not directories. */ Line 511  regular if they are not directories. */
511    
512  int isregfile(char *filename)  int isregfile(char *filename)
513  {  {
514  return !isdirectory(filename)  return !isdirectory(filename);
515  }  }
516    
517    
518  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
519    
520  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
521    
522  static BOOL  static BOOL
523  is_stdout_tty(void)  is_stdout_tty(void)
524  {  {
525  FALSE;  return FALSE;
526    }
527    
528    static BOOL
529    is_file_tty(FILE *f)
530    {
531    return FALSE;
532  }  }
533    
534    
# Line 453  void closedirectory(directory_type *dir) Line 553  void closedirectory(directory_type *dir)
553  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
554    
555    
556  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
557    
558  static BOOL  static BOOL
559  is_stdout_tty(void)  is_stdout_tty(void)
# Line 461  is_stdout_tty(void) Line 561  is_stdout_tty(void)
561  return FALSE;  return FALSE;
562  }  }
563    
564    static BOOL
565    is_file_tty(FILE *f)
566    {
567    return FALSE;
568    }
569    
570  #endif  #endif
571    
# Line 489  return sys_errlist[n]; Line 594  return sys_errlist[n];
594    
595    
596  /*************************************************  /*************************************************
597    *            Read one line of input              *
598    *************************************************/
599    
600    /* Normally, input is read using fread() into a large buffer, so many lines may
601    be read at once. However, doing this for tty input means that no output appears
602    until a lot of input has been typed. Instead, tty input is handled line by
603    line. We cannot use fgets() for this, because it does not stop at a binary
604    zero, and therefore there is no way of telling how many characters it has read,
605    because there may be binary zeros embedded in the data.
606    
607    Arguments:
608      buffer     the buffer to read into
609      length     the maximum number of characters to read
610      f          the file
611    
612    Returns:     the number of characters read, zero at end of file
613    */
614    
615    static int
616    read_one_line(char *buffer, int length, FILE *f)
617    {
618    int c;
619    int yield = 0;
620    while ((c = fgetc(f)) != EOF)
621      {
622      buffer[yield++] = c;
623      if (c == '\n' || yield >= length) break;
624      }
625    return yield;
626    }
627    
628    
629    
630    /*************************************************
631  *             Find end of line                   *  *             Find end of line                   *
632  *************************************************/  *************************************************/
633    
# Line 500  Arguments: Line 639  Arguments:
639    endptr    end of available data    endptr    end of available data
640    lenptr    where to put the length of the eol sequence    lenptr    where to put the length of the eol sequence
641    
642  Returns:    pointer to the last byte of the line  Returns:    pointer after the last byte of the line,
643                including the newline byte(s)
644  */  */
645    
646  static char *  static char *
# Line 783  if (after_context > 0 && lastmatchnumber Line 923  if (after_context > 0 && lastmatchnumber
923      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
924      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
925      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
926      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
927      lastmatchrestart = pp;      lastmatchrestart = pp;
928      }      }
929    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 793  if (after_context > 0 && lastmatchnumber Line 933  if (after_context > 0 && lastmatchnumber
933    
934    
935  /*************************************************  /*************************************************
936    *   Apply patterns to subject till one matches   *
937    *************************************************/
938    
939    /* This function is called to run through all patterns, looking for a match. It
940    is used multiple times for the same subject when colouring is enabled, in order
941    to find all possible matches.
942    
943    Arguments:
944      matchptr     the start of the subject
945      length       the length of the subject to match
946      startoffset  where to start matching
947      offsets      the offets vector to fill in
948      mrc          address of where to put the result of pcre_exec()
949    
950    Returns:      TRUE if there was a match
951                  FALSE if there was no match
952                  invert if there was a non-fatal error
953    */
954    
955    static BOOL
956    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
957      int *mrc)
958    {
959    int i;
960    size_t slen = length;
961    const char *msg = "this text:\n\n";
962    if (slen > 200)
963      {
964      slen = 200;
965      msg = "text that starts:\n\n";
966      }
967    for (i = 0; i < pattern_count; i++)
968      {
969      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
970        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
971      if (*mrc >= 0) return TRUE;
972      if (*mrc == PCRE_ERROR_NOMATCH) continue;
973      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
974      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
975      fprintf(stderr, "%s", msg);
976      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
977      fprintf(stderr, "\n\n");
978      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
979        resource_error = TRUE;
980      if (error_count++ > 20)
981        {
982        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
983        pcregrep_exit(2);
984        }
985      return invert;    /* No more matching; don't show the line again */
986      }
987    
988    return FALSE;  /* No match, no errors */
989    }
990    
991    
992    
993    /*************************************************
994  *            Grep an individual file             *  *            Grep an individual file             *
995  *************************************************/  *************************************************/
996    
997  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
998  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
999  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1000  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1001  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
1002  "before" context printing.  "before" context printing.
1003    
1004  Arguments:  Arguments:
1005    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
1006                   the gzFile pointer when reading is via libz
1007                   the BZFILE pointer when reading is via libbz2
1008      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1009      filename     the file name or NULL (for errors)
1010    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1011                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1012                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1013    
1014  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1015                 1 otherwise (no matches)                 1 otherwise (no matches)
1016                   2 if an overlong line is encountered
1017                   3 if there is a read error on a .bz2 file
1018  */  */
1019    
1020  static int  static int
1021  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1022  {  {
1023  int rc = 1;  int rc = 1;
1024  int linenumber = 1;  int linenumber = 1;
1025  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1026  int count = 0;  int count = 0;
1027  int offsets[99];  int filepos = 0;
1028    int offsets[OFFSET_SIZE];
1029  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1030  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1031  char *endptr;  char *endptr;
1032  size_t bufflength;  size_t bufflength;
1033  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1034    BOOL input_line_buffered = line_buffered;
1035    FILE *in = NULL;                    /* Ensure initialized */
1036    
1037    #ifdef SUPPORT_LIBZ
1038    gzFile ingz = NULL;
1039    #endif
1040    
1041    #ifdef SUPPORT_LIBBZ2
1042    BZFILE *inbz2 = NULL;
1043    #endif
1044    
 /* Do the first read into the start of the buffer and set up the pointer to  
 end of what we have. */  
1045    
1046  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  /* Do the first read into the start of the buffer and set up the pointer to end
1047  endptr = buffer + bufflength;  of what we have. In the case of libz, a non-zipped .gz file will be read as a
1048    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1049    fail. */
1050    
1051    #ifdef SUPPORT_LIBZ
1052    if (frtype == FR_LIBZ)
1053      {
1054      ingz = (gzFile)handle;
1055      bufflength = gzread (ingz, main_buffer, bufsize);
1056      }
1057    else
1058    #endif
1059    
1060    #ifdef SUPPORT_LIBBZ2
1061    if (frtype == FR_LIBBZ2)
1062      {
1063      inbz2 = (BZFILE *)handle;
1064      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1065      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1066      }                                    /* without the cast it is unsigned. */
1067    else
1068    #endif
1069    
1070      {
1071      in = (FILE *)handle;
1072      if (is_file_tty(in)) input_line_buffered = TRUE;
1073      bufflength = input_line_buffered?
1074        read_one_line(main_buffer, bufsize, in) :
1075        fread(main_buffer, 1, bufsize, in);
1076      }
1077    
1078    endptr = main_buffer + bufflength;
1079    
1080  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1081  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 841  way, the buffer is shifted left and re-f Line 1084  way, the buffer is shifted left and re-f
1084    
1085  while (ptr < endptr)  while (ptr < endptr)
1086    {    {
1087    int i, endlinelength;    int endlinelength;
1088    int mrc = 0;    int mrc = 0;
1089    BOOL match = FALSE;    int startoffset = 0;
1090      BOOL match;
1091      char *matchptr = ptr;
1092    char *t = ptr;    char *t = ptr;
1093    size_t length, linelength;    size_t length, linelength;
1094    
1095    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1096    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1097    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1098    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1099    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1100    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1101      first line. */
1102    
1103    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1104    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1105    length = multiline? (size_t)(endptr - ptr) : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1106    
1107      /* Check to see if the line we are looking at extends right to the very end
1108      of the buffer without a line terminator. This means the line is too long to
1109      handle. */
1110    
1111      if (endlinelength == 0 && t == main_buffer + bufsize)
1112        {
1113        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1114                        "pcregrep: check the --buffer_size option\n",
1115                        linenumber,
1116                        (filename == NULL)? "" : " of file ",
1117                        (filename == NULL)? "" : filename);
1118        return 2;
1119        }
1120    
1121    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1122    
# Line 867  while (ptr < endptr) Line 1127  while (ptr < endptr)
1127        #include <time.h>        #include <time.h>
1128        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1129        struct timezone dummy;        struct timezone dummy;
1130          int i;
1131    
1132        if (jfriedl_XT)        if (jfriedl_XT)
1133        {        {
# Line 875  while (ptr < endptr) Line 1136  while (ptr < endptr)
1136            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1137            if (!ptr) {            if (!ptr) {
1138                    printf("out of memory");                    printf("out of memory");
1139                    exit(2);                    pcregrep_exit(2);
1140            }            }
1141            endptr = ptr;            endptr = ptr;
1142            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 892  while (ptr < endptr) Line 1153  while (ptr < endptr)
1153    
1154    
1155        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1156            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1157                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1158    
1159        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1160                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 906  while (ptr < endptr) Line 1168  while (ptr < endptr)
1168    }    }
1169  #endif  #endif
1170    
1171      /* We come back here after a match when the -o option (only_matching) is set,
1172      in order to find any further matches in the same line. */
1173    
1174    /* Run through all the patterns until one matches. Note that we don't include    ONLY_MATCHING_RESTART:
   the final newline in the subject string. */  
1175    
1176    for (i = 0; i < pattern_count; i++)    /* Run through all the patterns until one matches or there is an error other
1177      {    than NOMATCH. This code is in a subroutine so that it can be re-used for
1178      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    finding subsequent matches when colouring matched lines. */
1179        offsets, 99);  
1180      if (mrc >= 0) { match = TRUE; break; }    match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1181    
1182    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1183    
# Line 956  while (ptr < endptr) Line 1196  while (ptr < endptr)
1196      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1197      in the file. */      in the file. */
1198    
1199      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1200        {        {
1201        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1202        return 0;        return 0;
# Line 966  while (ptr < endptr) Line 1206  while (ptr < endptr)
1206    
1207      else if (quiet) return 0;      else if (quiet) return 0;
1208    
1209      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, or a
1210      does not pring any context. */      captured portion of it, as long as this string is not empty, and the
1211        --file-offsets and --line-offsets options output offsets for the matching
1212        substring (they both force --only-matching = 0). None of these options
1213        prints any context. Afterwards, adjust the start and then jump back to look
1214        for further matches in the same line. If we are in invert mode, however,
1215        nothing is printed and we do not restart - this could still be useful
1216        because the return code is set. */
1217    
1218      else if (only_matching)      else if (only_matching >= 0)
1219        {        {
1220        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1221        if (number) fprintf(stdout, "%d:", linenumber);          {
1222        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1223        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1224            if (line_offsets)
1225              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1226                offsets[1] - offsets[0]);
1227            else if (file_offsets)
1228              fprintf(stdout, "%d,%d\n",
1229                (int)(filepos + matchptr + offsets[0] - ptr),
1230                offsets[1] - offsets[0]);
1231            else if (only_matching < mrc)
1232              {
1233              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1234              if (plen > 0)
1235                {
1236                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1237                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1238                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1239                fprintf(stdout, "\n");
1240                }
1241              }
1242            else if (printname != NULL || number) fprintf(stdout, "\n");
1243            match = FALSE;
1244            if (line_buffered) fflush(stdout);
1245            rc = 0;                      /* Had some success */
1246            startoffset = offsets[1];    /* Restart after the match */
1247            goto ONLY_MATCHING_RESTART;
1248            }
1249        }        }
1250    
1251      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 1008  while (ptr < endptr) Line 1279  while (ptr < endptr)
1279            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1280            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1281            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1282            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1283            lastmatchrestart = pp;            lastmatchrestart = pp;
1284            }            }
1285          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1031  while (ptr < endptr) Line 1302  while (ptr < endptr)
1302          int linecount = 0;          int linecount = 0;
1303          char *p = ptr;          char *p = ptr;
1304    
1305          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1306                 linecount < before_context)                 linecount < before_context)
1307            {            {
1308            linecount++;            linecount++;
1309            p = previous_line(p, buffer);            p = previous_line(p, main_buffer);
1310            }            }
1311    
1312          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 1048  while (ptr < endptr) Line 1319  while (ptr < endptr)
1319            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1320            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1321            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1322            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1323            p = pp;            p = pp;
1324            }            }
1325          }          }
# Line 1064  while (ptr < endptr) Line 1335  while (ptr < endptr)
1335    
1336        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1337        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1338        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1339        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1340          the match will always be before the first newline sequence. */
1341    
1342        if (multiline)        if (multiline & !invert)
1343          {          {
         int ellength;  
1344          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1345          t = ptr;          t = ptr;
1346          while (t < endmatch)          while (t < endmatch)
1347            {            {
1348            t = end_of_line(t, endptr, &ellength);            t = end_of_line(t, endptr, &endlinelength);
1349            if (t <= endmatch) linenumber++; else break;            if (t < endmatch) linenumber++; else break;
1350            }            }
1351          endmatch = end_of_line(endmatch, endptr, &ellength);          linelength = t - ptr - endlinelength;
         linelength = endmatch - ptr - ellength;  
1352          }          }
1353    
1354        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 1093  while (ptr < endptr) Line 1363  while (ptr < endptr)
1363          {          {
1364          int first = S_arg * 2;          int first = S_arg * 2;
1365          int last  = first + 1;          int last  = first + 1;
1366          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1367          fprintf(stdout, "X");          fprintf(stdout, "X");
1368          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1369          }          }
1370        else        else
1371  #endif  #endif
1372    
1373        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1374          matches, but not of course if the line is a non-match. */
1375    
1376        if (do_colour)        if (do_colour && !invert)
1377          {          {
1378          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1379            FWRITE(ptr, 1, offsets[0], stdout);
1380          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1381          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1382          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1383          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1384              {
1385              startoffset = offsets[1];
1386              if (startoffset >= linelength + endlinelength ||
1387                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1388                break;
1389              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1390              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1391              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1392              fprintf(stdout, "%c[00m", 0x1b);
1393              }
1394    
1395            /* In multiline mode, we may have already printed the complete line
1396            and its line-ending characters (if they matched the pattern), so there
1397            may be no more to print. */
1398    
1399            plength = (linelength + endlinelength) - startoffset;
1400            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1401          }          }
1402        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1403          /* Not colouring; no need to search for further matches */
1404    
1405          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1406        }        }
1407    
1408      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1409        given, flush the output. */
1410    
1411        if (line_buffered) fflush(stdout);
1412      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1413    
1414      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1124  while (ptr < endptr) Line 1418  while (ptr < endptr)
1418      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1419      }      }
1420    
1421    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1422      anything to be printed), we have to move on to the end of the match before
1423      proceeding. */
1424    
1425      if (multiline && invert && match)
1426        {
1427        int ellength;
1428        char *endmatch = ptr + offsets[1];
1429        t = ptr;
1430        while (t < endmatch)
1431          {
1432          t = end_of_line(t, endptr, &ellength);
1433          if (t <= endmatch) linenumber++; else break;
1434          }
1435        endmatch = end_of_line(endmatch, endptr, &ellength);
1436        linelength = endmatch - ptr - ellength;
1437        }
1438    
1439      /* Advance to after the newline and increment the line number. The file
1440      offset to the current line is maintained in filepos. */
1441    
1442    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1443      filepos += (int)(linelength + endlinelength);
1444    linenumber++;    linenumber++;
1445    
1446      /* If input is line buffered, and the buffer is not yet full, read another
1447      line and add it into the buffer. */
1448    
1449      if (input_line_buffered && bufflength < bufsize)
1450        {
1451        int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
1452        bufflength += add;
1453        endptr += add;
1454        }
1455    
1456    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1457    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1458    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1459    about to be lost, print them. */    about to be lost, print them. */
1460    
1461    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= bufsize && ptr > main_buffer + 2*bufthird)
1462      {      {
1463      if (after_context > 0 &&      if (after_context > 0 &&
1464          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1465          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1466        {        {
1467        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1468        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 1146  while (ptr < endptr) Line 1470  while (ptr < endptr)
1470    
1471      /* Now do the shuffle */      /* Now do the shuffle */
1472    
1473      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1474      ptr -= MBUFTHIRD;      ptr -= bufthird;
1475      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1476      endptr = buffer + bufflength;  #ifdef SUPPORT_LIBZ
1477        if (frtype == FR_LIBZ)
1478          bufflength = 2*bufthird +
1479            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1480        else
1481    #endif
1482    
1483    #ifdef SUPPORT_LIBBZ2
1484        if (frtype == FR_LIBBZ2)
1485          bufflength = 2*bufthird +
1486            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1487        else
1488    #endif
1489    
1490        bufflength = 2*bufthird +
1491          (input_line_buffered?
1492           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1493           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1494        endptr = main_buffer + bufflength;
1495    
1496      /* Adjust any last match point */      /* Adjust any last match point */
1497    
1498      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1499      }      }
1500    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1501    
1502  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1503  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1504    
1505  if (!only_matching && !count_only)  if (only_matching < 0 && !count_only)
1506    {    {
1507    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1508    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1179  if (filenames == FN_NOMATCH_ONLY) Line 1521  if (filenames == FN_NOMATCH_ONLY)
1521    
1522  if (count_only)  if (count_only)
1523    {    {
1524    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1525    fprintf(stdout, "%d\n", count);      {
1526        if (printname != NULL && filenames != FN_NONE)
1527          fprintf(stdout, "%s:", printname);
1528        fprintf(stdout, "%d\n", count);
1529        }
1530    }    }
1531    
1532  return rc;  return rc;
# Line 1212  grep_or_recurse(char *pathname, BOOL dir Line 1558  grep_or_recurse(char *pathname, BOOL dir
1558  {  {
1559  int rc = 1;  int rc = 1;
1560  int sep;  int sep;
1561  FILE *in;  int frtype;
1562    int pathlen;
1563    void *handle;
1564    FILE *in = NULL;           /* Ensure initialized */
1565    
1566    #ifdef SUPPORT_LIBZ
1567    gzFile ingz = NULL;
1568    #endif
1569    
1570    #ifdef SUPPORT_LIBBZ2
1571    BZFILE *inbz2 = NULL;
1572    #endif
1573    
1574  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1575    
1576  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1577    {    {
1578    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN, stdin_name,
1579      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1580        stdin_name : NULL);        stdin_name : NULL);
1581    }    }
1582    
   
1583  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1584  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1585  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1586    system-specific. */
1587    
1588  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1589    {    {
# Line 1247  if ((sep = isdirectory(pathname)) != 0) Line 1604  if ((sep = isdirectory(pathname)) != 0)
1604    
1605      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1606        {        {
1607        int frc, blen;        int frc, nflen;
1608        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1609        blen = strlen(buffer);        nflen = (int)(strlen(nextfile));
1610    
1611          if (isdirectory(buffer))
1612            {
1613            if (exclude_dir_compiled != NULL &&
1614                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1615              continue;
1616    
1617            if (include_dir_compiled != NULL &&
1618                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1619              continue;
1620            }
1621          else
1622            {
1623            if (exclude_compiled != NULL &&
1624                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1625              continue;
1626    
1627        if (exclude_compiled != NULL &&          if (include_compiled != NULL &&
1628            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)              pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1629          continue;            continue;
1630            }
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
1631    
1632        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1633        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 1280  skipping was not requested. The scan pro Line 1650  skipping was not requested. The scan pro
1650  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1651  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1652    
1653  in = fopen(pathname, "r");  pathlen = (int)(strlen(pathname));
1654  if (in == NULL)  
1655    /* Open using zlib if it is supported and the file name ends with .gz. */
1656    
1657    #ifdef SUPPORT_LIBZ
1658    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1659      {
1660      ingz = gzopen(pathname, "rb");
1661      if (ingz == NULL)
1662        {
1663        if (!silent)
1664          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1665            strerror(errno));
1666        return 2;
1667        }
1668      handle = (void *)ingz;
1669      frtype = FR_LIBZ;
1670      }
1671    else
1672    #endif
1673    
1674    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1675    
1676    #ifdef SUPPORT_LIBBZ2
1677    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1678      {
1679      inbz2 = BZ2_bzopen(pathname, "rb");
1680      handle = (void *)inbz2;
1681      frtype = FR_LIBBZ2;
1682      }
1683    else
1684    #endif
1685    
1686    /* Otherwise use plain fopen(). The label is so that we can come back here if
1687    an attempt to read a .bz2 file indicates that it really is a plain file. */
1688    
1689    #ifdef SUPPORT_LIBBZ2
1690    PLAIN_FILE:
1691    #endif
1692      {
1693      in = fopen(pathname, "rb");
1694      handle = (void *)in;
1695      frtype = FR_PLAIN;
1696      }
1697    
1698    /* All the opening methods return errno when they fail. */
1699    
1700    if (handle == NULL)
1701    {    {
1702    if (!silent)    if (!silent)
1703      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 1289  if (in == NULL) Line 1705  if (in == NULL)
1705    return 2;    return 2;
1706    }    }
1707    
1708  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1709    
1710    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1711    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1712    
1713    /* Close in an appropriate manner. */
1714    
1715    #ifdef SUPPORT_LIBZ
1716    if (frtype == FR_LIBZ)
1717      gzclose(ingz);
1718    else
1719    #endif
1720    
1721    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1722    read failed. If the error indicates that the file isn't in fact bzipped, try
1723    again as a normal file. */
1724    
1725    #ifdef SUPPORT_LIBBZ2
1726    if (frtype == FR_LIBBZ2)
1727      {
1728      if (rc == 3)
1729        {
1730        int errnum;
1731        const char *err = BZ2_bzerror(inbz2, &errnum);
1732        if (errnum == BZ_DATA_ERROR_MAGIC)
1733          {
1734          BZ2_bzclose(inbz2);
1735          goto PLAIN_FILE;
1736          }
1737        else if (!silent)
1738          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1739            pathname, err);
1740        rc = 2;    /* The normal "something went wrong" code */
1741        }
1742      BZ2_bzclose(inbz2);
1743      }
1744    else
1745    #endif
1746    
1747    /* Normal file close */
1748    
1749  fclose(in);  fclose(in);
1750    
1751    /* Pass back the yield from pcregrep(). */
1752    
1753  return rc;  return rc;
1754  }  }
1755    
# Line 1313  for (op = optionlist; op->one_char != 0; Line 1770  for (op = optionlist; op->one_char != 0;
1770    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1771    }    }
1772  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1773  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1774      "options.\n");
1775  return rc;  return rc;
1776  }  }
1777    
# Line 1332  option_item *op; Line 1790  option_item *op;
1790  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1791  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1792  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1793  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1794  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1795    #ifdef SUPPORT_LIBZ
1796    printf("Files whose names end in .gz are read using zlib.\n");
1797    #endif
1798    
1799    #ifdef SUPPORT_LIBBZ2
1800    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1801    #endif
1802    
1803    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1804    printf("Other files and the standard input are read as plain files.\n\n");
1805    #else
1806    printf("All files are read as plain files, without any interpretation.\n\n");
1807    #endif
1808    
1809    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1810  printf("Options:\n");  printf("Options:\n");
1811    
1812  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
1813    {    {
1814    int n;    int n;
1815    char s[4];    char s[4];
1816    
1817      /* Two options were accidentally implemented and documented with underscores
1818      instead of hyphens in their names, something that was not noticed for quite a
1819      few releases. When fixing this, I left the underscored versions in the list
1820      in case people were using them. However, we don't want to display them in the
1821      help data. There are no other options that contain underscores, and we do not
1822      expect ever to implement such options. Therefore, just omit any option that
1823      contains an underscore. */
1824    
1825      if (strchr(op->long_name, '_') != NULL) continue;
1826    
1827    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1828    printf("  %s --%s%n", s, op->long_name, &n);    n = 31 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1829    if (n < 1) n = 1;    if (n < 1) n = 1;
1830    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1831    }    }
1832    
1833  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1834    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1835    printf("When reading patterns from a file instead of using a command line option,\n");
1836  printf("trailing white space is removed and blank lines are ignored.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1837  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1838      MAX_PATTERN_COUNT, PATBUFSIZE);
1839    
1840  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1841  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 1368  handle_option(int letter, int options) Line 1853  handle_option(int letter, int options)
1853  {  {
1854  switch(letter)  switch(letter)
1855    {    {
1856    case N_HELP: help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1857      case N_HELP: help(); pcregrep_exit(0);
1858      case N_LOFFSETS: line_offsets = number = TRUE; break;
1859      case N_LBUFFER: line_buffered = TRUE; break;
1860    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1861    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1862    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1863    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1864    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1865    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1866    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1867    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1868    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1869    case 'o': only_matching = TRUE; break;    case 'o': only_matching = 0; break;
1870    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1871    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1872    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1389  switch(letter) Line 1877  switch(letter)
1877    
1878    case 'V':    case 'V':
1879    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1880    exit(0);    pcregrep_exit(0);
1881    break;    break;
1882    
1883    default:    default:
1884    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1885    exit(usage(2));    pcregrep_exit(usage(2));
1886    }    }
1887    
1888  return options;  return options;
# Line 1449  Returns:         TRUE on success, FALSE Line 1937  Returns:         TRUE on success, FALSE
1937  static BOOL  static BOOL
1938  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_single_pattern(char *pattern, int options, char *filename, int count)
1939  {  {
1940  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
1941  const char *error;  const char *error;
1942  int errptr;  int errptr;
1943    
# Line 1460  if (pattern_count >= MAX_PATTERN_COUNT) Line 1948  if (pattern_count >= MAX_PATTERN_COUNT)
1948    return FALSE;    return FALSE;
1949    }    }
1950    
1951  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1952    suffix[process_options]);    suffix[process_options]);
1953  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1954    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
# Line 1519  compile_pattern(char *pattern, int optio Line 2007  compile_pattern(char *pattern, int optio
2007  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
2008    {    {
2009    char *eop = pattern + strlen(pattern);    char *eop = pattern + strlen(pattern);
2010    char buffer[MBUFTHIRD];    char buffer[PATBUFSIZE];
2011    for(;;)    for(;;)
2012      {      {
2013      int ellength;      int ellength;
# Line 1559  const char *error; Line 2047  const char *error;
2047    
2048  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2049  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2050  */  Note that the return values from pcre_config(), though derived from the ASCII
2051    codes, are the same in EBCDIC environments, so we must use the actual values
2052    rather than escapes such as as '\r'. */
2053    
2054  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2055  switch(i)  switch(i)
2056    {    {
2057    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
2058    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
2059    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2060    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
2061    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
2062    }    }
2063    
2064  /* Process the options */  /* Process the options */
# Line 1588  for (i = 1; i < argc; i++) Line 2078  for (i = 1; i < argc; i++)
2078    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2079      {      {
2080      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2081        else exit(usage(2));        else pcregrep_exit(usage(2));
2082      }      }
2083    
2084    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1610  for (i = 1; i < argc; i++) Line 2100  for (i = 1; i < argc; i++)
2100      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2101      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2102      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2103      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2104      these categories, fortunately. */      both these categories. */
2105    
2106      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2107        {        {
2108        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2109        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2110        if (opbra == NULL)     /* Not a (p) case */  
2111          /* Handle options with only one spelling of the name */
2112    
2113          if (opbra == NULL)     /* Does not contain '(' */
2114          {          {
2115          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2116            {            {
# Line 1625  for (i = 1; i < argc; i++) Line 2118  for (i = 1; i < argc; i++)
2118            }            }
2119          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2120            {            {
2121            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2122            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2123                (int)strlen(arg) : (int)(argequals - arg);
2124            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2125              {              {
2126              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1639  for (i = 1; i < argc; i++) Line 2133  for (i = 1; i < argc; i++)
2133              }              }
2134            }            }
2135          }          }
2136        else                   /* Special case xxxx(p) */  
2137          /* Handle options with an alternate spelling of the name */
2138    
2139          else
2140          {          {
2141          char buff1[24];          char buff1[24];
2142          char buff2[24];          char buff2[24];
2143          int baselen = opbra - op->long_name;  
2144            int baselen = (int)(opbra - op->long_name);
2145            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2146            int arglen = (argequals == NULL || equals == NULL)?
2147              (int)strlen(arg) : (int)(argequals - arg);
2148    
2149          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2150          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2151            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2152          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2153               strncmp(arg, buff2, arglen) == 0)
2154              {
2155              if (equals != NULL && argequals != NULL)
2156                {
2157                option_data = argequals;
2158                if (*option_data == '=')
2159                  {
2160                  option_data++;
2161                  longopwasequals = TRUE;
2162                  }
2163                }
2164            break;            break;
2165              }
2166          }          }
2167        }        }
2168    
2169      if (op->one_char == 0)      if (op->one_char == 0)
2170        {        {
2171        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2172        exit(usage(2));        pcregrep_exit(usage(2));
2173        }        }
2174      }      }
2175    
   
2176    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2177    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2178    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1693  for (i = 1; i < argc; i++) Line 2206  for (i = 1; i < argc; i++)
2206      while (*s != 0)      while (*s != 0)
2207        {        {
2208        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2209          { if (*s == op->one_char) break; }          {
2210            if (*s == op->one_char) break;
2211            }
2212        if (op->one_char == 0)        if (op->one_char == 0)
2213          {          {
2214          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2215            *s, argv[i]);            *s, argv[i]);
2216          exit(usage(2));          pcregrep_exit(usage(2));
2217          }          }
2218        if (op->type != OP_NODATA || s[1] == 0)  
2219          /* Check for a single-character option that has data: OP_OP_NUMBER
2220          is used for one that either has a numerical number or defaults, i.e. the
2221          data is optional. If a digit follows, there is data; if not, carry on
2222          with other single-character options in the same string. */
2223    
2224          option_data = s+1;
2225          if (op->type == OP_OP_NUMBER)
2226          {          {
2227          option_data = s+1;          if (isdigit((unsigned char)s[1])) break;
         break;  
2228          }          }
2229          else   /* Check for end or a dataless option */
2230            {
2231            if (op->type != OP_NODATA || s[1] == 0) break;
2232            }
2233    
2234          /* Handle a single-character option with no data, then loop for the
2235          next character in the string. */
2236    
2237        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2238        }        }
2239      }      }
# Line 1721  for (i = 1; i < argc; i++) Line 2250  for (i = 1; i < argc; i++)
2250    
2251    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2252    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2253    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2254    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2255    
2256    if (*option_data == 0 &&    if (*option_data == 0 &&
2257        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1732  for (i = 1; i < argc; i++) Line 2261  for (i = 1; i < argc; i++)
2261        case N_COLOUR:        case N_COLOUR:
2262        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2263        break;        break;
2264    
2265          case 'o':
2266          only_matching = 0;
2267          break;
2268    
2269  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2270        case 'S':        case 'S':
2271        S_arg = 0;        S_arg = 0;
# Line 1748  for (i = 1; i < argc; i++) Line 2282  for (i = 1; i < argc; i++)
2282      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2283        {        {
2284        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2285        exit(usage(2));        pcregrep_exit(usage(2));
2286        }        }
2287      option_data = argv[++i];      option_data = argv[++i];
2288      }      }
# Line 1769  for (i = 1; i < argc; i++) Line 2303  for (i = 1; i < argc; i++)
2303    
2304    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
2305    
2306    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2307               op->type != OP_OP_NUMBER)
2308      {      {
2309      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2310      }      }
2311    
2312      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2313      only for unpicking arguments, so just keep it simple. */
2314    
2315    else    else
2316      {      {
2317      char *endptr;      unsigned long int n = 0;
2318      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2319        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2320        while (isdigit((unsigned char)(*endptr)))
2321          n = n * 10 + (int)(*endptr++ - '0');
2322        if (toupper(*endptr) == 'K')
2323          {
2324          n *= 1024;
2325          endptr++;
2326          }
2327        else if (toupper(*endptr) == 'M')
2328          {
2329          n *= 1024*1024;
2330          endptr++;
2331          }
2332      if (*endptr != 0)      if (*endptr != 0)
2333        {        {
2334        if (longop)        if (longop)
2335          {          {
2336          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2337          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2338            equals - op->long_name;            (int)(equals - op->long_name);
2339          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2340            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2341          }          }
2342        else        else
2343          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2344            option_data, op->one_char);            option_data, op->one_char);
2345        exit(usage(2));        pcregrep_exit(usage(2));
2346        }        }
2347      *((int *)op->dataptr) = n;      if (op->type == OP_LONGNUMBER)
2348            *((unsigned long int *)op->dataptr) = n;
2349        else
2350            *((int *)op->dataptr) = n;
2351      }      }
2352    }    }
2353    
# Line 1805  if (both_context > 0) Line 2360  if (both_context > 0)
2360    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2361    }    }
2362    
2363    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2364    However, the latter two set only_matching. */
2365    
2366    if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2367        (file_offsets && line_offsets))
2368      {
2369      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2370        "and/or --line-offsets\n");
2371      pcregrep_exit(usage(2));
2372      }
2373    
2374    if (file_offsets || line_offsets) only_matching = 0;
2375    
2376  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2377  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2378    
# Line 1927  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 2495  if (jfriedl_XT != 0 || jfriedl_XR != 0)
2495    }    }
2496  #endif  #endif
2497    
2498  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer, and to store the pattern and hints lists. */
2499    
2500    bufsize = 3*bufthird;
2501    main_buffer = (char *)malloc(bufsize);
2502  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2503  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2504    
2505  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2506    {    {
2507    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2508    goto EXIT2;    goto EXIT2;
# Line 1964  if (pattern_filename != NULL) Line 2534  if (pattern_filename != NULL)
2534    int linenumber = 0;    int linenumber = 0;
2535    FILE *f;    FILE *f;
2536    char *filename;    char *filename;
2537    char buffer[MBUFTHIRD];    char buffer[PATBUFSIZE];
2538    
2539    if (strcmp(pattern_filename, "-") == 0)    if (strcmp(pattern_filename, "-") == 0)
2540      {      {
# Line 1983  if (pattern_filename != NULL) Line 2553  if (pattern_filename != NULL)
2553      filename = pattern_filename;      filename = pattern_filename;
2554      }      }
2555    
2556    while (fgets(buffer, MBUFTHIRD, f) != NULL)    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2557      {      {
2558      char *s = buffer + (int)strlen(buffer);      char *s = buffer + (int)strlen(buffer);
2559      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
# Line 2012  for (j = 0; j < pattern_count; j++) Line 2582  for (j = 0; j < pattern_count; j++)
2582    hint_count++;    hint_count++;
2583    }    }
2584    
2585    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2586    pcre_extra block for each pattern. */
2587    
2588    if (match_limit > 0 || match_limit_recursion > 0)
2589      {
2590      for (j = 0; j < pattern_count; j++)
2591        {
2592        if (hints_list[j] == NULL)
2593          {
2594          hints_list[j] = malloc(sizeof(pcre_extra));
2595          if (hints_list[j] == NULL)
2596            {
2597            fprintf(stderr, "pcregrep: malloc failed\n");
2598            pcregrep_exit(2);
2599            }
2600          }
2601        if (match_limit > 0)
2602          {
2603          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2604          hints_list[j]->match_limit = match_limit;
2605          }
2606        if (match_limit_recursion > 0)
2607          {
2608          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2609          hints_list[j]->match_limit_recursion = match_limit_recursion;
2610          }
2611        }
2612      }
2613    
2614  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2615    
2616  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
# Line 2038  if (include_pattern != NULL) Line 2637  if (include_pattern != NULL)
2637      }      }
2638    }    }
2639    
2640    if (exclude_dir_pattern != NULL)
2641      {
2642      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2643        pcretables);
2644      if (exclude_dir_compiled == NULL)
2645        {
2646        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2647          errptr, error);
2648        goto EXIT2;
2649        }
2650      }
2651    
2652    if (include_dir_pattern != NULL)
2653      {
2654      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2655        pcretables);
2656      if (include_dir_compiled == NULL)
2657        {
2658        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2659          errptr, error);
2660        goto EXIT2;
2661        }
2662      }
2663    
2664  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2665    
2666  if (i >= argc)  if (i >= argc)
2667    {    {
2668    rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2669        (filenames > FN_DEFAULT)? stdin_name : NULL);
2670    goto EXIT;    goto EXIT;
2671    }    }
2672    
# Line 2062  for (; i < argc; i++) Line 2686  for (; i < argc; i++)
2686    }    }
2687    
2688  EXIT:  EXIT:
2689    if (main_buffer != NULL) free(main_buffer);
2690  if (pattern_list != NULL)  if (pattern_list != NULL)
2691    {    {
2692    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
# Line 2069  if (pattern_list != NULL) Line 2694  if (pattern_list != NULL)
2694    }    }
2695  if (hints_list != NULL)  if (hints_list != NULL)
2696    {    {
2697    for (i = 0; i < hint_count; i++) free(hints_list[i]);    for (i = 0; i < hint_count; i++)
2698        {
2699        if (hints_list[i] != NULL) free(hints_list[i]);
2700        }
2701    free(hints_list);    free(hints_list);
2702    }    }
2703  return rc;  pcregrep_exit(rc);
2704    
2705  EXIT2:  EXIT2:
2706  rc = 2;  rc = 2;

Legend:
Removed from v.199  
changed lines
  Added in v.644

  ViewVC Help
Powered by ViewVC 1.1.5