/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 283 by ph10, Fri Dec 7 19:59:19 2007 UTC revision 667 by ph10, Mon Aug 22 14:57:32 2011 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 55  POSSIBILITY OF SUCH DAMAGE.
55  #include <unistd.h>  #include <unistd.h>
56  #endif  #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 63  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define PATBUFSIZE BUFSIZ
78  #else  #else
79  #define MBUFTHIRD 8192  #define PATBUFSIZE 8192
80  #endif  #endif
81    
82  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
# Line 91  enum { DEE_READ, DEE_SKIP }; Line 104  enum { DEE_READ, DEE_SKIP };
104    
105  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 114  static char *colour_string = (char *)"1; Line 135  static char *colour_string = (char *)"1;
135  static char *colour_option = NULL;  static char *colour_option = NULL;
136  static char *dee_option = NULL;  static char *dee_option = NULL;
137  static char *DEE_option = NULL;  static char *DEE_option = NULL;
138    static char *main_buffer = NULL;
139  static char *newline = NULL;  static char *newline = NULL;
140  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
141  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
# Line 127  static pcre_extra **hints_list = NULL; Line 149  static pcre_extra **hints_list = NULL;
149    
150  static char *include_pattern = NULL;  static char *include_pattern = NULL;
151  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
152    static char *include_dir_pattern = NULL;
153    static char *exclude_dir_pattern = NULL;
154    
155  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
156  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
157    static pcre *include_dir_compiled = NULL;
158    static pcre *exclude_dir_compiled = NULL;
159    
160  static int after_context = 0;  static int after_context = 0;
161  static int before_context = 0;  static int before_context = 0;
162  static int both_context = 0;  static int both_context = 0;
163    static int bufthird = PCREGREP_BUFSIZE;
164    static int bufsize = 3*PCREGREP_BUFSIZE;
165  static int dee_action = dee_READ;  static int dee_action = dee_READ;
166  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
167  static int error_count = 0;  static int error_count = 0;
168  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
169    static int only_matching = -1;
170  static int process_options = 0;  static int process_options = 0;
171    static int study_options = 0;
172    
173    static unsigned long int match_limit = 0;
174    static unsigned long int match_limit_recursion = 0;
175    
176  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
177  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
178  static BOOL file_offsets = FALSE;  static BOOL file_offsets = FALSE;
179  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
180  static BOOL invert = FALSE;  static BOOL invert = FALSE;
181    static BOOL line_buffered = FALSE;
182  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
183  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
184  static BOOL number = FALSE;  static BOOL number = FALSE;
185  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
186    static BOOL resource_error = FALSE;
187  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
188  static BOOL silent = FALSE;  static BOOL silent = FALSE;
189  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
190    
191  /* Structure for options and list of them */  /* Structure for options and list of them */
192    
193  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
194         OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST };
195    
196  typedef struct option_item {  typedef struct option_item {
197    int type;    int type;
# Line 169  typedef struct option_item { Line 204  typedef struct option_item {
204  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
205  used to identify them. */  used to identify them. */
206    
207  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
208  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
209  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
210  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
211  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
212  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
213  #define N_NULL      (-7)  #define N_LABEL        (-7)
214  #define N_LOFFSETS  (-8)  #define N_LOCALE       (-8)
215  #define N_FOFFSETS  (-9)  #define N_NULL         (-9)
216    #define N_LOFFSETS     (-10)
217    #define N_FOFFSETS     (-11)
218    #define N_LBUFFER      (-12)
219    #define N_M_LIMIT      (-13)
220    #define N_M_LIMIT_REC  (-14)
221    #define N_BUFSIZE      (-15)
222    
223  static option_item optionlist[] = {  static option_item optionlist[] = {
224    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
225    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
226    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
227    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
228    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
229    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
230    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
231    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
232    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
233    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
234    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
235    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
236    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
237    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
238    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
239    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
240    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
241    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
242    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     'j',      NULL,              "jit",           "use JIT compiler if available" },
243    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
244    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
245    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
246    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
247    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
248    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
249    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
250    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
251    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
252    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
253    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
254      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
255      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
256      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
257      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
258      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
259      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
260      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
261    
262      /* These two were accidentally implemented with underscores instead of
263      hyphens in the option names. As this was not discovered for several releases,
264      the incorrect versions are left in the table for compatibility. However, the
265      --help function misses out any option that has an underscore in its name. */
266    
267      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
268      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
269    
270  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
271    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
272  #endif  #endif
# Line 246  const char utf8_table4[] = { Line 303  const char utf8_table4[] = {
303    
304    
305  /*************************************************  /*************************************************
306    *         Exit from the program                  *
307    *************************************************/
308    
309    /* If there has been a resource error, give a suitable message.
310    
311    Argument:  the return code
312    Returns:   does not return
313    */
314    
315    static void
316    pcregrep_exit(int rc)
317    {
318    if (resource_error)
319      {
320      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
321        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
322      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
323      }
324    
325    exit(rc);
326    }
327    
328    
329    /*************************************************
330  *            OS-specific functions               *  *            OS-specific functions               *
331  *************************************************/  *************************************************/
332    
# Line 309  return (statbuf.st_mode & S_IFMT) == S_I Line 390  return (statbuf.st_mode & S_IFMT) == S_I
390  }  }
391    
392    
393  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
394    
395  static BOOL  static BOOL
396  is_stdout_tty(void)  is_stdout_tty(void)
# Line 317  is_stdout_tty(void) Line 398  is_stdout_tty(void)
398  return isatty(fileno(stdout));  return isatty(fileno(stdout));
399  }  }
400    
401    static BOOL
402    is_file_tty(FILE *f)
403    {
404    return isatty(fileno(f));
405    }
406    
407    
408  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
409    
410  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
411  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
412  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
413  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
414  */  The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
415    undefined when it is indeed undefined. */
416    
417  #elif HAVE_WINDOWS_H  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
418    
419  #ifndef STRICT  #ifndef STRICT
420  # define STRICT  # define STRICT
# Line 370  dir = (directory_type *) malloc(sizeof(* Line 458  dir = (directory_type *) malloc(sizeof(*
458  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
459    {    {
460    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
461    exit(2);    pcregrep_exit(2);
462    }    }
463  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
464  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 429  return !isdirectory(filename); Line 517  return !isdirectory(filename);
517  }  }
518    
519    
520  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
521    
522  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
523    
# Line 439  is_stdout_tty(void) Line 527  is_stdout_tty(void)
527  return FALSE;  return FALSE;
528  }  }
529    
530    static BOOL
531    is_file_tty(FILE *f)
532    {
533    return FALSE;
534    }
535    
536    
537  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
538    
# Line 461  void closedirectory(directory_type *dir) Line 555  void closedirectory(directory_type *dir)
555  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
556    
557    
558  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
559    
560  static BOOL  static BOOL
561  is_stdout_tty(void)  is_stdout_tty(void)
# Line 469  is_stdout_tty(void) Line 563  is_stdout_tty(void)
563  return FALSE;  return FALSE;
564  }  }
565    
566    static BOOL
567    is_file_tty(FILE *f)
568    {
569    return FALSE;
570    }
571    
572  #endif  #endif
573    
# Line 497  return sys_errlist[n]; Line 596  return sys_errlist[n];
596    
597    
598  /*************************************************  /*************************************************
599    *            Read one line of input              *
600    *************************************************/
601    
602    /* Normally, input is read using fread() into a large buffer, so many lines may
603    be read at once. However, doing this for tty input means that no output appears
604    until a lot of input has been typed. Instead, tty input is handled line by
605    line. We cannot use fgets() for this, because it does not stop at a binary
606    zero, and therefore there is no way of telling how many characters it has read,
607    because there may be binary zeros embedded in the data.
608    
609    Arguments:
610      buffer     the buffer to read into
611      length     the maximum number of characters to read
612      f          the file
613    
614    Returns:     the number of characters read, zero at end of file
615    */
616    
617    static int
618    read_one_line(char *buffer, int length, FILE *f)
619    {
620    int c;
621    int yield = 0;
622    while ((c = fgetc(f)) != EOF)
623      {
624      buffer[yield++] = c;
625      if (c == '\n' || yield >= length) break;
626      }
627    return yield;
628    }
629    
630    
631    
632    /*************************************************
633  *             Find end of line                   *  *             Find end of line                   *
634  *************************************************/  *************************************************/
635    
# Line 508  Arguments: Line 641  Arguments:
641    endptr    end of available data    endptr    end of available data
642    lenptr    where to put the length of the eol sequence    lenptr    where to put the length of the eol sequence
643    
644  Returns:    pointer to the last byte of the line  Returns:    pointer after the last byte of the line,
645                including the newline byte(s)
646  */  */
647    
648  static char *  static char *
# Line 791  if (after_context > 0 && lastmatchnumber Line 925  if (after_context > 0 && lastmatchnumber
925      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
926      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
927      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
928      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
929      lastmatchrestart = pp;      lastmatchrestart = pp;
930      }      }
931    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 801  if (after_context > 0 && lastmatchnumber Line 935  if (after_context > 0 && lastmatchnumber
935    
936    
937  /*************************************************  /*************************************************
938    *   Apply patterns to subject till one matches   *
939    *************************************************/
940    
941    /* This function is called to run through all patterns, looking for a match. It
942    is used multiple times for the same subject when colouring is enabled, in order
943    to find all possible matches.
944    
945    Arguments:
946      matchptr     the start of the subject
947      length       the length of the subject to match
948      startoffset  where to start matching
949      offsets      the offets vector to fill in
950      mrc          address of where to put the result of pcre_exec()
951    
952    Returns:      TRUE if there was a match
953                  FALSE if there was no match
954                  invert if there was a non-fatal error
955    */
956    
957    static BOOL
958    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
959      int *mrc)
960    {
961    int i;
962    size_t slen = length;
963    const char *msg = "this text:\n\n";
964    if (slen > 200)
965      {
966      slen = 200;
967      msg = "text that starts:\n\n";
968      }
969    for (i = 0; i < pattern_count; i++)
970      {
971      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
972        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
973      if (*mrc >= 0) return TRUE;
974      if (*mrc == PCRE_ERROR_NOMATCH) continue;
975      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
976      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
977      fprintf(stderr, "%s", msg);
978      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
979      fprintf(stderr, "\n\n");
980      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
981        resource_error = TRUE;
982      if (error_count++ > 20)
983        {
984        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
985        pcregrep_exit(2);
986        }
987      return invert;    /* No more matching; don't show the line again */
988      }
989    
990    return FALSE;  /* No match, no errors */
991    }
992    
993    
994    
995    /*************************************************
996  *            Grep an individual file             *  *            Grep an individual file             *
997  *************************************************/  *************************************************/
998    
999  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1000  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1001  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1002  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1003  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
1004  "before" context printing.  "before" context printing.
1005    
1006  Arguments:  Arguments:
1007    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
1008                   the gzFile pointer when reading is via libz
1009                   the BZFILE pointer when reading is via libbz2
1010      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1011      filename     the file name or NULL (for errors)
1012    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1013                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1014                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1015    
1016  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1017                 1 otherwise (no matches)                 1 otherwise (no matches)
1018                   2 if an overlong line is encountered
1019                   3 if there is a read error on a .bz2 file
1020  */  */
1021    
1022  static int  static int
1023  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1024  {  {
1025  int rc = 1;  int rc = 1;
1026  int linenumber = 1;  int linenumber = 1;
1027  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1028  int count = 0;  int count = 0;
1029  int filepos = 0;  int filepos = 0;
1030  int offsets[99];  int offsets[OFFSET_SIZE];
1031  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1032  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1033  char *endptr;  char *endptr;
1034  size_t bufflength;  size_t bufflength;
1035  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1036    BOOL input_line_buffered = line_buffered;
1037    FILE *in = NULL;                    /* Ensure initialized */
1038    
1039    #ifdef SUPPORT_LIBZ
1040    gzFile ingz = NULL;
1041    #endif
1042    
1043    #ifdef SUPPORT_LIBBZ2
1044    BZFILE *inbz2 = NULL;
1045    #endif
1046    
1047    
1048    /* Do the first read into the start of the buffer and set up the pointer to end
1049    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1050    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1051    fail. */
1052    
1053    #ifdef SUPPORT_LIBZ
1054    if (frtype == FR_LIBZ)
1055      {
1056      ingz = (gzFile)handle;
1057      bufflength = gzread (ingz, main_buffer, bufsize);
1058      }
1059    else
1060    #endif
1061    
1062    #ifdef SUPPORT_LIBBZ2
1063    if (frtype == FR_LIBBZ2)
1064      {
1065      inbz2 = (BZFILE *)handle;
1066      bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1067      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1068      }                                    /* without the cast it is unsigned. */
1069    else
1070    #endif
1071    
1072  /* Do the first read into the start of the buffer and set up the pointer to    {
1073  end of what we have. */    in = (FILE *)handle;
1074      if (is_file_tty(in)) input_line_buffered = TRUE;
1075      bufflength = input_line_buffered?
1076        read_one_line(main_buffer, bufsize, in) :
1077        fread(main_buffer, 1, bufsize, in);
1078      }
1079    
1080  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  endptr = main_buffer + bufflength;
 endptr = buffer + bufflength;  
1081    
1082  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1083  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 850  way, the buffer is shifted left and re-f Line 1086  way, the buffer is shifted left and re-f
1086    
1087  while (ptr < endptr)  while (ptr < endptr)
1088    {    {
1089    int i, endlinelength;    int endlinelength;
1090    int mrc = 0;    int mrc = 0;
1091    BOOL match = FALSE;    int startoffset = 0;
1092    char *matchptr = ptr;    BOOL match;
1093      char *matchptr = ptr;
1094    char *t = ptr;    char *t = ptr;
1095    size_t length, linelength;    size_t length, linelength;
1096    
1097    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1098    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1099    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1100    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1101    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1102    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1103      first line. */
1104    
1105    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1106    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1107    length = multiline? (size_t)(endptr - ptr) : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1108    
1109      /* Check to see if the line we are looking at extends right to the very end
1110      of the buffer without a line terminator. This means the line is too long to
1111      handle. */
1112    
1113      if (endlinelength == 0 && t == main_buffer + bufsize)
1114        {
1115        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1116                        "pcregrep: check the --buffer-size option\n",
1117                        linenumber,
1118                        (filename == NULL)? "" : " of file ",
1119                        (filename == NULL)? "" : filename);
1120        return 2;
1121        }
1122    
1123    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1124    
1125  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
# Line 877  while (ptr < endptr) Line 1129  while (ptr < endptr)
1129        #include <time.h>        #include <time.h>
1130        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1131        struct timezone dummy;        struct timezone dummy;
1132          int i;
1133    
1134        if (jfriedl_XT)        if (jfriedl_XT)
1135        {        {
# Line 885  while (ptr < endptr) Line 1138  while (ptr < endptr)
1138            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1139            if (!ptr) {            if (!ptr) {
1140                    printf("out of memory");                    printf("out of memory");
1141                    exit(2);                    pcregrep_exit(2);
1142            }            }
1143            endptr = ptr;            endptr = ptr;
1144            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 902  while (ptr < endptr) Line 1155  while (ptr < endptr)
1155    
1156    
1157        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1158            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1159                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1160    
1161        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1162                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 916  while (ptr < endptr) Line 1170  while (ptr < endptr)
1170    }    }
1171  #endif  #endif
1172    
1173    /* We come back here after a match when the -o option (only_matching) is set,    /* We come back here after a match when the -o option (only_matching) is set,
1174    in order to find any further matches in the same line. */    in order to find any further matches in the same line. */
   
   ONLY_MATCHING_RESTART:  
1175    
1176    /* Run through all the patterns until one matches. Note that we don't include    ONLY_MATCHING_RESTART:
   the final newline in the subject string. */  
1177    
1178    for (i = 0; i < pattern_count; i++)    /* Run through all the patterns until one matches or there is an error other
1179      {    than NOMATCH. This code is in a subroutine so that it can be re-used for
1180      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,    finding subsequent matches when colouring matched lines. */
1181        offsets, 99);  
1182      if (mrc >= 0) { match = TRUE; break; }    match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1183    
1184    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1185    
# Line 970  while (ptr < endptr) Line 1198  while (ptr < endptr)
1198      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1199      in the file. */      in the file. */
1200    
1201      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1202        {        {
1203        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1204        return 0;        return 0;
# Line 980  while (ptr < endptr) Line 1208  while (ptr < endptr)
1208    
1209      else if (quiet) return 0;      else if (quiet) return 0;
1210    
1211      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, or a
1212      the --file-offsets and --line-offsets options output offsets for the      captured portion of it, as long as this string is not empty, and the
1213      matching substring (they both force --only-matching). None of these options      --file-offsets and --line-offsets options output offsets for the matching
1214      prints any context. Afterwards, adjust the start and length, and then jump      substring (they both force --only-matching = 0). None of these options
1215      back to look for further matches in the same line. If we are in invert      prints any context. Afterwards, adjust the start and then jump back to look
1216      mode, however, nothing is printed - this could be still useful because the      for further matches in the same line. If we are in invert mode, however,
1217      return code is set. */      nothing is printed and we do not restart - this could still be useful
1218        because the return code is set. */
1219    
1220      else if (only_matching)      else if (only_matching >= 0)
1221        {        {
1222        if (!invert)        if (!invert)
1223          {          {
1224          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1225          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1226          if (line_offsets)          if (line_offsets)
1227            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1228              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1229          else if (file_offsets)          else if (file_offsets)
1230            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n",
1231              offsets[1] - offsets[0]);              (int)(filepos + matchptr + offsets[0] - ptr),
1232          else              offsets[1] - offsets[0]);
1233            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          else if (only_matching < mrc)
1234          fprintf(stdout, "\n");            {
1235          matchptr += offsets[1];            int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1236          length -= offsets[1];            if (plen > 0)
1237          match = FALSE;              {
1238          goto ONLY_MATCHING_RESTART;              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1239          }              FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1240                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1241                fprintf(stdout, "\n");
1242                }
1243              }
1244            else if (printname != NULL || number) fprintf(stdout, "\n");
1245            match = FALSE;
1246            if (line_buffered) fflush(stdout);
1247            rc = 0;                      /* Had some success */
1248            startoffset = offsets[1];    /* Restart after the match */
1249            goto ONLY_MATCHING_RESTART;
1250            }
1251        }        }
1252    
1253      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 1041  while (ptr < endptr) Line 1281  while (ptr < endptr)
1281            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1282            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1283            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1284            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1285            lastmatchrestart = pp;            lastmatchrestart = pp;
1286            }            }
1287          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1064  while (ptr < endptr) Line 1304  while (ptr < endptr)
1304          int linecount = 0;          int linecount = 0;
1305          char *p = ptr;          char *p = ptr;
1306    
1307          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1308                 linecount < before_context)                 linecount < before_context)
1309            {            {
1310            linecount++;            linecount++;
1311            p = previous_line(p, buffer);            p = previous_line(p, main_buffer);
1312            }            }
1313    
1314          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 1081  while (ptr < endptr) Line 1321  while (ptr < endptr)
1321            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1322            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1323            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1324            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1325            p = pp;            p = pp;
1326            }            }
1327          }          }
# Line 1101  while (ptr < endptr) Line 1341  while (ptr < endptr)
1341        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1342        the match will always be before the first newline sequence. */        the match will always be before the first newline sequence. */
1343    
1344        if (multiline)        if (multiline & !invert)
1345          {          {
1346          int ellength;          char *endmatch = ptr + offsets[1];
1347          char *endmatch = ptr;          t = ptr;
1348          if (!invert)          while (t < endmatch)
1349            {            {
1350            endmatch += offsets[1];            t = end_of_line(t, endptr, &endlinelength);
1351            t = ptr;            if (t < endmatch) linenumber++; else break;
           while (t < endmatch)  
             {  
             t = end_of_line(t, endptr, &ellength);  
             if (t <= endmatch) linenumber++; else break;  
             }  
1352            }            }
1353          endmatch = end_of_line(endmatch, endptr, &ellength);          linelength = t - ptr - endlinelength;
         linelength = endmatch - ptr - ellength;  
1354          }          }
1355    
1356        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 1131  while (ptr < endptr) Line 1365  while (ptr < endptr)
1365          {          {
1366          int first = S_arg * 2;          int first = S_arg * 2;
1367          int last  = first + 1;          int last  = first + 1;
1368          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1369          fprintf(stdout, "X");          fprintf(stdout, "X");
1370          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1371          }          }
1372        else        else
1373  #endif  #endif
1374    
1375        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1376          matches, but not of course if the line is a non-match. */
1377    
1378        if (do_colour)        if (do_colour && !invert)
1379          {          {
1380          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1381            FWRITE(ptr, 1, offsets[0], stdout);
1382          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1383          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1384          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1385          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1386            stdout);            {
1387              startoffset = offsets[1];
1388              if (startoffset >= linelength + endlinelength ||
1389                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1390                break;
1391              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1392              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1393              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1394              fprintf(stdout, "%c[00m", 0x1b);
1395              }
1396    
1397            /* In multiline mode, we may have already printed the complete line
1398            and its line-ending characters (if they matched the pattern), so there
1399            may be no more to print. */
1400    
1401            plength = (linelength + endlinelength) - startoffset;
1402            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1403          }          }
1404        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1405          /* Not colouring; no need to search for further matches */
1406    
1407          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1408        }        }
1409    
1410      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1411        given, flush the output. */
1412    
1413        if (line_buffered) fflush(stdout);
1414      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1415    
1416      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1181  while (ptr < endptr) Line 1438  while (ptr < endptr)
1438      linelength = endmatch - ptr - ellength;      linelength = endmatch - ptr - ellength;
1439      }      }
1440    
1441    /* Advance to after the newline and increment the line number. The file    /* Advance to after the newline and increment the line number. The file
1442    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1443    
1444    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1445    filepos += linelength + endlinelength;    filepos += (int)(linelength + endlinelength);
1446    linenumber++;    linenumber++;
1447    
1448      /* If input is line buffered, and the buffer is not yet full, read another
1449      line and add it into the buffer. */
1450    
1451      if (input_line_buffered && bufflength < bufsize)
1452        {
1453        int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
1454        bufflength += add;
1455        endptr += add;
1456        }
1457    
1458    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1459    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1460    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1461    about to be lost, print them. */    about to be lost, print them. */
1462    
1463    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= bufsize && ptr > main_buffer + 2*bufthird)
1464      {      {
1465      if (after_context > 0 &&      if (after_context > 0 &&
1466          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1467          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1468        {        {
1469        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1470        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 1205  while (ptr < endptr) Line 1472  while (ptr < endptr)
1472    
1473      /* Now do the shuffle */      /* Now do the shuffle */
1474    
1475      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1476      ptr -= MBUFTHIRD;      ptr -= bufthird;
1477      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1478      endptr = buffer + bufflength;  #ifdef SUPPORT_LIBZ
1479        if (frtype == FR_LIBZ)
1480          bufflength = 2*bufthird +
1481            gzread (ingz, main_buffer + 2*bufthird, bufthird);
1482        else
1483    #endif
1484    
1485    #ifdef SUPPORT_LIBBZ2
1486        if (frtype == FR_LIBBZ2)
1487          bufflength = 2*bufthird +
1488            BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1489        else
1490    #endif
1491    
1492        bufflength = 2*bufthird +
1493          (input_line_buffered?
1494           read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1495           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1496        endptr = main_buffer + bufflength;
1497    
1498      /* Adjust any last match point */      /* Adjust any last match point */
1499    
1500      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1501      }      }
1502    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1503    
1504  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1505  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1506    
1507  if (!only_matching && !count_only)  if (only_matching < 0 && !count_only)
1508    {    {
1509    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1510    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1238  if (filenames == FN_NOMATCH_ONLY) Line 1523  if (filenames == FN_NOMATCH_ONLY)
1523    
1524  if (count_only)  if (count_only)
1525    {    {
1526    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1527    fprintf(stdout, "%d\n", count);      {
1528        if (printname != NULL && filenames != FN_NONE)
1529          fprintf(stdout, "%s:", printname);
1530        fprintf(stdout, "%d\n", count);
1531        }
1532    }    }
1533    
1534  return rc;  return rc;
# Line 1271  grep_or_recurse(char *pathname, BOOL dir Line 1560  grep_or_recurse(char *pathname, BOOL dir
1560  {  {
1561  int rc = 1;  int rc = 1;
1562  int sep;  int sep;
1563  FILE *in;  int frtype;
1564    int pathlen;
1565    void *handle;
1566    FILE *in = NULL;           /* Ensure initialized */
1567    
1568    #ifdef SUPPORT_LIBZ
1569    gzFile ingz = NULL;
1570    #endif
1571    
1572    #ifdef SUPPORT_LIBBZ2
1573    BZFILE *inbz2 = NULL;
1574    #endif
1575    
1576  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1577    
1578  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1579    {    {
1580    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN, stdin_name,
1581      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1582        stdin_name : NULL);        stdin_name : NULL);
1583    }    }
1584    
   
1585  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1586  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1587  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1588    system-specific. */
1589    
1590  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1591    {    {
# Line 1306  if ((sep = isdirectory(pathname)) != 0) Line 1606  if ((sep = isdirectory(pathname)) != 0)
1606    
1607      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1608        {        {
1609        int frc, blen;        int frc, nflen;
1610        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1611        blen = strlen(buffer);        nflen = (int)(strlen(nextfile));
1612    
1613        if (exclude_compiled != NULL &&        if (isdirectory(buffer))
1614            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)          {
1615          continue;          if (exclude_dir_compiled != NULL &&
1616                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1617        if (include_compiled != NULL &&            continue;
1618            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
1619          continue;          if (include_dir_compiled != NULL &&
1620                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1621              continue;
1622            }
1623          else
1624            {
1625            if (exclude_compiled != NULL &&
1626                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1627              continue;
1628    
1629            if (include_compiled != NULL &&
1630                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1631              continue;
1632            }
1633    
1634        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1635        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 1339  skipping was not requested. The scan pro Line 1652  skipping was not requested. The scan pro
1652  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1653  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1654    
1655  in = fopen(pathname, "r");  pathlen = (int)(strlen(pathname));
1656  if (in == NULL)  
1657    /* Open using zlib if it is supported and the file name ends with .gz. */
1658    
1659    #ifdef SUPPORT_LIBZ
1660    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1661      {
1662      ingz = gzopen(pathname, "rb");
1663      if (ingz == NULL)
1664        {
1665        if (!silent)
1666          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1667            strerror(errno));
1668        return 2;
1669        }
1670      handle = (void *)ingz;
1671      frtype = FR_LIBZ;
1672      }
1673    else
1674    #endif
1675    
1676    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1677    
1678    #ifdef SUPPORT_LIBBZ2
1679    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1680      {
1681      inbz2 = BZ2_bzopen(pathname, "rb");
1682      handle = (void *)inbz2;
1683      frtype = FR_LIBBZ2;
1684      }
1685    else
1686    #endif
1687    
1688    /* Otherwise use plain fopen(). The label is so that we can come back here if
1689    an attempt to read a .bz2 file indicates that it really is a plain file. */
1690    
1691    #ifdef SUPPORT_LIBBZ2
1692    PLAIN_FILE:
1693    #endif
1694      {
1695      in = fopen(pathname, "rb");
1696      handle = (void *)in;
1697      frtype = FR_PLAIN;
1698      }
1699    
1700    /* All the opening methods return errno when they fail. */
1701    
1702    if (handle == NULL)
1703    {    {
1704    if (!silent)    if (!silent)
1705      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 1348  if (in == NULL) Line 1707  if (in == NULL)
1707    return 2;    return 2;
1708    }    }
1709    
1710  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1711    
1712    rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1713    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1714    
1715    /* Close in an appropriate manner. */
1716    
1717    #ifdef SUPPORT_LIBZ
1718    if (frtype == FR_LIBZ)
1719      gzclose(ingz);
1720    else
1721    #endif
1722    
1723    /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1724    read failed. If the error indicates that the file isn't in fact bzipped, try
1725    again as a normal file. */
1726    
1727    #ifdef SUPPORT_LIBBZ2
1728    if (frtype == FR_LIBBZ2)
1729      {
1730      if (rc == 3)
1731        {
1732        int errnum;
1733        const char *err = BZ2_bzerror(inbz2, &errnum);
1734        if (errnum == BZ_DATA_ERROR_MAGIC)
1735          {
1736          BZ2_bzclose(inbz2);
1737          goto PLAIN_FILE;
1738          }
1739        else if (!silent)
1740          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1741            pathname, err);
1742        rc = 2;    /* The normal "something went wrong" code */
1743        }
1744      BZ2_bzclose(inbz2);
1745      }
1746    else
1747    #endif
1748    
1749    /* Normal file close */
1750    
1751  fclose(in);  fclose(in);
1752    
1753    /* Pass back the yield from pcregrep(). */
1754    
1755  return rc;  return rc;
1756  }  }
1757    
# Line 1392  option_item *op; Line 1792  option_item *op;
1792  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1793  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1794  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1795  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1796  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1797    #ifdef SUPPORT_LIBZ
1798    printf("Files whose names end in .gz are read using zlib.\n");
1799    #endif
1800    
1801    #ifdef SUPPORT_LIBBZ2
1802    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1803    #endif
1804    
1805    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1806    printf("Other files and the standard input are read as plain files.\n\n");
1807    #else
1808    printf("All files are read as plain files, without any interpretation.\n\n");
1809    #endif
1810    
1811    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1812  printf("Options:\n");  printf("Options:\n");
1813    
1814  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
1815    {    {
1816    int n;    int n;
1817    char s[4];    char s[4];
1818    
1819      /* Two options were accidentally implemented and documented with underscores
1820      instead of hyphens in their names, something that was not noticed for quite a
1821      few releases. When fixing this, I left the underscored versions in the list
1822      in case people were using them. However, we don't want to display them in the
1823      help data. There are no other options that contain underscores, and we do not
1824      expect ever to implement such options. Therefore, just omit any option that
1825      contains an underscore. */
1826    
1827      if (strchr(op->long_name, '_') != NULL) continue;
1828    
1829    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1830    printf("  %s --%s%n", s, op->long_name, &n);    n = 31 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1831    if (n < 1) n = 1;    if (n < 1) n = 1;
1832    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1833    }    }
1834    
1835  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1836    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1837    printf("When reading patterns from a file instead of using a command line option,\n");
1838  printf("trailing white space is removed and blank lines are ignored.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1839  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1840      MAX_PATTERN_COUNT, PATBUFSIZE);
1841    
1842  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1843  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 1428  handle_option(int letter, int options) Line 1855  handle_option(int letter, int options)
1855  {  {
1856  switch(letter)  switch(letter)
1857    {    {
1858    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
1859    case N_HELP: help(); exit(0);    case N_HELP: help(); pcregrep_exit(0);
1860    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
1861      case N_LBUFFER: line_buffered = TRUE; break;
1862    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1863    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1864    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1865    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1866    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1867    case 'l': filenames = FN_ONLY; break;    case 'j': study_options |= PCRE_STUDY_JIT_COMPILE; break;
1868      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1869    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1870    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1871    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1872    case 'o': only_matching = TRUE; break;    case 'o': only_matching = 0; break;
1873    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1874    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1875    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1451  switch(letter) Line 1880  switch(letter)
1880    
1881    case 'V':    case 'V':
1882    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1883    exit(0);    pcregrep_exit(0);
1884    break;    break;
1885    
1886    default:    default:
1887    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1888    exit(usage(2));    pcregrep_exit(usage(2));
1889    }    }
1890    
1891  return options;  return options;
# Line 1511  Returns:         TRUE on success, FALSE Line 1940  Returns:         TRUE on success, FALSE
1940  static BOOL  static BOOL
1941  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_single_pattern(char *pattern, int options, char *filename, int count)
1942  {  {
1943  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
1944  const char *error;  const char *error;
1945  int errptr;  int errptr;
1946    
# Line 1522  if (pattern_count >= MAX_PATTERN_COUNT) Line 1951  if (pattern_count >= MAX_PATTERN_COUNT)
1951    return FALSE;    return FALSE;
1952    }    }
1953    
1954  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1955    suffix[process_options]);    suffix[process_options]);
1956  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1957    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
# Line 1581  compile_pattern(char *pattern, int optio Line 2010  compile_pattern(char *pattern, int optio
2010  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
2011    {    {
2012    char *eop = pattern + strlen(pattern);    char *eop = pattern + strlen(pattern);
2013    char buffer[MBUFTHIRD];    char buffer[PATBUFSIZE];
2014    for(;;)    for(;;)
2015      {      {
2016      int ellength;      int ellength;
# Line 1621  const char *error; Line 2050  const char *error;
2050    
2051  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2052  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2053  */  Note that the return values from pcre_config(), though derived from the ASCII
2054    codes, are the same in EBCDIC environments, so we must use the actual values
2055    rather than escapes such as as '\r'. */
2056    
2057  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2058  switch(i)  switch(i)
2059    {    {
2060    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
2061    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
2062    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2063    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
2064    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
2065    }    }
2066    
2067  /* Process the options */  /* Process the options */
# Line 1650  for (i = 1; i < argc; i++) Line 2081  for (i = 1; i < argc; i++)
2081    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2082      {      {
2083      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2084        else exit(usage(2));        else pcregrep_exit(usage(2));
2085      }      }
2086    
2087    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1672  for (i = 1; i < argc; i++) Line 2103  for (i = 1; i < argc; i++)
2103      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2104      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2105      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2106      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2107      these categories, fortunately. */      both these categories. */
2108    
2109      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2110        {        {
2111        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2112        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2113        if (opbra == NULL)     /* Not a (p) case */  
2114          /* Handle options with only one spelling of the name */
2115    
2116          if (opbra == NULL)     /* Does not contain '(' */
2117          {          {
2118          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2119            {            {
# Line 1687  for (i = 1; i < argc; i++) Line 2121  for (i = 1; i < argc; i++)
2121            }            }
2122          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2123            {            {
2124            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2125            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2126                (int)strlen(arg) : (int)(argequals - arg);
2127            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2128              {              {
2129              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1701  for (i = 1; i < argc; i++) Line 2136  for (i = 1; i < argc; i++)
2136              }              }
2137            }            }
2138          }          }
2139        else                   /* Special case xxxx(p) */  
2140          /* Handle options with an alternate spelling of the name */
2141    
2142          else
2143          {          {
2144          char buff1[24];          char buff1[24];
2145          char buff2[24];          char buff2[24];
2146          int baselen = opbra - op->long_name;  
2147            int baselen = (int)(opbra - op->long_name);
2148            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2149            int arglen = (argequals == NULL || equals == NULL)?
2150              (int)strlen(arg) : (int)(argequals - arg);
2151    
2152          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2153          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2154            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2155          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2156               strncmp(arg, buff2, arglen) == 0)
2157              {
2158              if (equals != NULL && argequals != NULL)
2159                {
2160                option_data = argequals;
2161                if (*option_data == '=')
2162                  {
2163                  option_data++;
2164                  longopwasequals = TRUE;
2165                  }
2166                }
2167            break;            break;
2168              }
2169          }          }
2170        }        }
2171    
2172      if (op->one_char == 0)      if (op->one_char == 0)
2173        {        {
2174        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2175        exit(usage(2));        pcregrep_exit(usage(2));
2176        }        }
2177      }      }
2178    
   
2179    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2180    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2181    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1755  for (i = 1; i < argc; i++) Line 2209  for (i = 1; i < argc; i++)
2209      while (*s != 0)      while (*s != 0)
2210        {        {
2211        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2212          { if (*s == op->one_char) break; }          {
2213            if (*s == op->one_char) break;
2214            }
2215        if (op->one_char == 0)        if (op->one_char == 0)
2216          {          {
2217          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2218            *s, argv[i]);            *s, argv[i]);
2219          exit(usage(2));          pcregrep_exit(usage(2));
2220          }          }
2221        if (op->type != OP_NODATA || s[1] == 0)  
2222          /* Check for a single-character option that has data: OP_OP_NUMBER
2223          is used for one that either has a numerical number or defaults, i.e. the
2224          data is optional. If a digit follows, there is data; if not, carry on
2225          with other single-character options in the same string. */
2226    
2227          option_data = s+1;
2228          if (op->type == OP_OP_NUMBER)
2229          {          {
2230          option_data = s+1;          if (isdigit((unsigned char)s[1])) break;
         break;  
2231          }          }
2232          else   /* Check for end or a dataless option */
2233            {
2234            if (op->type != OP_NODATA || s[1] == 0) break;
2235            }
2236    
2237          /* Handle a single-character option with no data, then loop for the
2238          next character in the string. */
2239    
2240        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2241        }        }
2242      }      }
# Line 1783  for (i = 1; i < argc; i++) Line 2253  for (i = 1; i < argc; i++)
2253    
2254    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2255    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2256    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2257    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2258    
2259    if (*option_data == 0 &&    if (*option_data == 0 &&
2260        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1794  for (i = 1; i < argc; i++) Line 2264  for (i = 1; i < argc; i++)
2264        case N_COLOUR:        case N_COLOUR:
2265        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2266        break;        break;
2267    
2268          case 'o':
2269          only_matching = 0;
2270          break;
2271    
2272  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2273        case 'S':        case 'S':
2274        S_arg = 0;        S_arg = 0;
# Line 1810  for (i = 1; i < argc; i++) Line 2285  for (i = 1; i < argc; i++)
2285      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2286        {        {
2287        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2288        exit(usage(2));        pcregrep_exit(usage(2));
2289        }        }
2290      option_data = argv[++i];      option_data = argv[++i];
2291      }      }
# Line 1831  for (i = 1; i < argc; i++) Line 2306  for (i = 1; i < argc; i++)
2306    
2307    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
2308    
2309    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2310               op->type != OP_OP_NUMBER)
2311      {      {
2312      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2313      }      }
2314    
2315      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2316      only for unpicking arguments, so just keep it simple. */
2317    
2318    else    else
2319      {      {
2320      char *endptr;      unsigned long int n = 0;
2321      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2322        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2323        while (isdigit((unsigned char)(*endptr)))
2324          n = n * 10 + (int)(*endptr++ - '0');
2325        if (toupper(*endptr) == 'K')
2326          {
2327          n *= 1024;
2328          endptr++;
2329          }
2330        else if (toupper(*endptr) == 'M')
2331          {
2332          n *= 1024*1024;
2333          endptr++;
2334          }
2335      if (*endptr != 0)      if (*endptr != 0)
2336        {        {
2337        if (longop)        if (longop)
2338          {          {
2339          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2340          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2341            equals - op->long_name;            (int)(equals - op->long_name);
2342          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2343            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2344          }          }
2345        else        else
2346          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2347            option_data, op->one_char);            option_data, op->one_char);
2348        exit(usage(2));        pcregrep_exit(usage(2));
2349        }        }
2350      *((int *)op->dataptr) = n;      if (op->type == OP_LONGNUMBER)
2351            *((unsigned long int *)op->dataptr) = n;
2352        else
2353            *((int *)op->dataptr) = n;
2354      }      }
2355    }    }
2356    
# Line 1866  if (both_context > 0) Line 2362  if (both_context > 0)
2362    if (after_context == 0) after_context = both_context;    if (after_context == 0) after_context = both_context;
2363    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2364    }    }
   
 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  
 However, the latter two set the only_matching flag. */  
2365    
2366  if ((only_matching && (file_offsets || line_offsets)) ||  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2367      (file_offsets && line_offsets))  However, the latter two set only_matching. */
2368    
2369    if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2370        (file_offsets && line_offsets))
2371    {    {
2372    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2373      "and/or --line-offsets\n");      "and/or --line-offsets\n");
2374    exit(usage(2));    pcregrep_exit(usage(2));
2375    }    }
2376    
2377  if (file_offsets || line_offsets) only_matching = TRUE;  if (file_offsets || line_offsets) only_matching = 0;
2378    
2379  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2380  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2002  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 2498  if (jfriedl_XT != 0 || jfriedl_XR != 0)
2498    }    }
2499  #endif  #endif
2500    
2501  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer, and to store the pattern and hints lists. */
2502    
2503    bufsize = 3*bufthird;
2504    main_buffer = (char *)malloc(bufsize);
2505  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2506  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2507    
2508  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2509    {    {
2510    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2511    goto EXIT2;    goto EXIT2;
# Line 2039  if (pattern_filename != NULL) Line 2537  if (pattern_filename != NULL)
2537    int linenumber = 0;    int linenumber = 0;
2538    FILE *f;    FILE *f;
2539    char *filename;    char *filename;
2540    char buffer[MBUFTHIRD];    char buffer[PATBUFSIZE];
2541    
2542    if (strcmp(pattern_filename, "-") == 0)    if (strcmp(pattern_filename, "-") == 0)
2543      {      {
# Line 2058  if (pattern_filename != NULL) Line 2556  if (pattern_filename != NULL)
2556      filename = pattern_filename;      filename = pattern_filename;
2557      }      }
2558    
2559    while (fgets(buffer, MBUFTHIRD, f) != NULL)    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2560      {      {
2561      char *s = buffer + (int)strlen(buffer);      char *s = buffer + (int)strlen(buffer);
2562      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
# Line 2076  if (pattern_filename != NULL) Line 2574  if (pattern_filename != NULL)
2574    
2575  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2576    {    {
2577    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2578    if (error != NULL)    if (error != NULL)
2579      {      {
2580      char s[16];      char s[16];
# Line 2087  for (j = 0; j < pattern_count; j++) Line 2585  for (j = 0; j < pattern_count; j++)
2585    hint_count++;    hint_count++;
2586    }    }
2587    
2588    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2589    pcre_extra block for each pattern. */
2590    
2591    if (match_limit > 0 || match_limit_recursion > 0)
2592      {
2593      for (j = 0; j < pattern_count; j++)
2594        {
2595        if (hints_list[j] == NULL)
2596          {
2597          hints_list[j] = malloc(sizeof(pcre_extra));
2598          if (hints_list[j] == NULL)
2599            {
2600            fprintf(stderr, "pcregrep: malloc failed\n");
2601            pcregrep_exit(2);
2602            }
2603          }
2604        if (match_limit > 0)
2605          {
2606          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2607          hints_list[j]->match_limit = match_limit;
2608          }
2609        if (match_limit_recursion > 0)
2610          {
2611          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2612          hints_list[j]->match_limit_recursion = match_limit_recursion;
2613          }
2614        }
2615      }
2616    
2617  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2618    
2619  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
# Line 2113  if (include_pattern != NULL) Line 2640  if (include_pattern != NULL)
2640      }      }
2641    }    }
2642    
2643    if (exclude_dir_pattern != NULL)
2644      {
2645      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2646        pcretables);
2647      if (exclude_dir_compiled == NULL)
2648        {
2649        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2650          errptr, error);
2651        goto EXIT2;
2652        }
2653      }
2654    
2655    if (include_dir_pattern != NULL)
2656      {
2657      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2658        pcretables);
2659      if (include_dir_compiled == NULL)
2660        {
2661        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2662          errptr, error);
2663        goto EXIT2;
2664        }
2665      }
2666    
2667  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2668    
2669  if (i >= argc)  if (i >= argc)
2670    {    {
2671    rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2672        (filenames > FN_DEFAULT)? stdin_name : NULL);
2673    goto EXIT;    goto EXIT;
2674    }    }
2675    
# Line 2137  for (; i < argc; i++) Line 2689  for (; i < argc; i++)
2689    }    }
2690    
2691  EXIT:  EXIT:
2692    if (main_buffer != NULL) free(main_buffer);
2693  if (pattern_list != NULL)  if (pattern_list != NULL)
2694    {    {
2695    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
# Line 2144  if (pattern_list != NULL) Line 2697  if (pattern_list != NULL)
2697    }    }
2698  if (hints_list != NULL)  if (hints_list != NULL)
2699    {    {
2700    for (i = 0; i < hint_count; i++) free(hints_list[i]);    for (i = 0; i < hint_count; i++)
2701        {
2702        if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2703        }
2704    free(hints_list);    free(hints_list);
2705    }    }
2706  return rc;  pcregrep_exit(rc);
2707    
2708  EXIT2:  EXIT2:
2709  rc = 2;  rc = 2;

Legend:
Removed from v.283  
changed lines
  Added in v.667

  ViewVC Help
Powered by ViewVC 1.1.5