/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 345 by ph10, Mon Apr 28 15:10:02 2008 UTC revision 879 by ph10, Sun Jan 15 15:50:06 2012 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 71  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define PATBUFSIZE BUFSIZ
78  #else  #else
79  #define MBUFTHIRD 8192  #define PATBUFSIZE 8192
80  #endif  #endif
81    
82  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88  /* File reading styles */  /* File reading styles */
89    
# Line 103  enum { DEE_READ, DEE_SKIP }; Line 104  enum { DEE_READ, DEE_SKIP };
104    
105  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 126  static char *colour_string = (char *)"1; Line 135  static char *colour_string = (char *)"1;
135  static char *colour_option = NULL;  static char *colour_option = NULL;
136  static char *dee_option = NULL;  static char *dee_option = NULL;
137  static char *DEE_option = NULL;  static char *DEE_option = NULL;
138    static char *main_buffer = NULL;
139  static char *newline = NULL;  static char *newline = NULL;
140  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
141  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
# Line 150  static pcre *exclude_dir_compiled = NULL Line 160  static pcre *exclude_dir_compiled = NULL
160  static int after_context = 0;  static int after_context = 0;
161  static int before_context = 0;  static int before_context = 0;
162  static int both_context = 0;  static int both_context = 0;
163    static int bufthird = PCREGREP_BUFSIZE;
164    static int bufsize = 3*PCREGREP_BUFSIZE;
165  static int dee_action = dee_READ;  static int dee_action = dee_READ;
166  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
167  static int error_count = 0;  static int error_count = 0;
168  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
169    static int only_matching = -1;
170  static int process_options = 0;  static int process_options = 0;
171    
172    #ifdef SUPPORT_PCREGREP_JIT
173    static int study_options = PCRE_STUDY_JIT_COMPILE;
174    #else
175    static int study_options = 0;
176    #endif
177    
178    static unsigned long int match_limit = 0;
179    static unsigned long int match_limit_recursion = 0;
180    
181  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
182  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
183  static BOOL file_offsets = FALSE;  static BOOL file_offsets = FALSE;
184  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
185  static BOOL invert = FALSE;  static BOOL invert = FALSE;
186    static BOOL line_buffered = FALSE;
187  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
188  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
189  static BOOL number = FALSE;  static BOOL number = FALSE;
190  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
191    static BOOL resource_error = FALSE;
192  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
193  static BOOL silent = FALSE;  static BOOL silent = FALSE;
194  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
195    
196  /* Structure for options and list of them */  /* Structure for options and list of them */
197    
198  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
199         OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST };
200    
201  typedef struct option_item {  typedef struct option_item {
202    int type;    int type;
# Line 196  used to identify them. */ Line 220  used to identify them. */
220  #define N_NULL         (-9)  #define N_NULL         (-9)
221  #define N_LOFFSETS     (-10)  #define N_LOFFSETS     (-10)
222  #define N_FOFFSETS     (-11)  #define N_FOFFSETS     (-11)
223    #define N_LBUFFER      (-12)
224    #define N_M_LIMIT      (-13)
225    #define N_M_LIMIT_REC  (-14)
226    #define N_BUFSIZE      (-15)
227    #define N_NOJIT        (-16)
228    
229  static option_item optionlist[] = {  static option_item optionlist[] = {
230    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
231    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
232    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
233    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
234    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
235    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
236    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
237    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
238    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
239    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
240    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
241    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
242    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
243    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
244    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
245    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
246    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
247    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
248    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },  #ifdef SUPPORT_PCREGREP_JIT
249    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
250    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },  #else
251    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
252    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },  #endif
253    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
254    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
255    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
256    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
257    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
258    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
259    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
260      { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
261      { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
262      { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
263      { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
264      { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
265      { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
266      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
267      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
268      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
269      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
270      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
271    
272      /* These two were accidentally implemented with underscores instead of
273      hyphens in the option names. As this was not discovered for several releases,
274      the incorrect versions are left in the table for compatibility. However, the
275      --help function misses out any option that has an underscore in its name. */
276    
277    { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },    { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
278    { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },    { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
279    
280  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
281    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
282  #endif  #endif
# Line 266  const char utf8_table4[] = { Line 313  const char utf8_table4[] = {
313    
314    
315  /*************************************************  /*************************************************
316    *         Exit from the program                  *
317    *************************************************/
318    
319    /* If there has been a resource error, give a suitable message.
320    
321    Argument:  the return code
322    Returns:   does not return
323    */
324    
325    static void
326    pcregrep_exit(int rc)
327    {
328    if (resource_error)
329      {
330      fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
331        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
332        PCRE_ERROR_JIT_STACKLIMIT);
333      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
334      }
335    
336    exit(rc);
337    }
338    
339    
340    /*************************************************
341  *            OS-specific functions               *  *            OS-specific functions               *
342  *************************************************/  *************************************************/
343    
# Line 329  return (statbuf.st_mode & S_IFMT) == S_I Line 401  return (statbuf.st_mode & S_IFMT) == S_I
401  }  }
402    
403    
404  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
405    
406  static BOOL  static BOOL
407  is_stdout_tty(void)  is_stdout_tty(void)
# Line 337  is_stdout_tty(void) Line 409  is_stdout_tty(void)
409  return isatty(fileno(stdout));  return isatty(fileno(stdout));
410  }  }
411    
412    static BOOL
413    is_file_tty(FILE *f)
414    {
415    return isatty(fileno(f));
416    }
417    
418    
419  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
420    
# Line 344  return isatty(fileno(stdout)); Line 422  return isatty(fileno(stdout));
422  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
423  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
424  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
425  */  The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
426    undefined when it is indeed undefined. */
427    
428  #elif HAVE_WINDOWS_H  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
429    
430  #ifndef STRICT  #ifndef STRICT
431  # define STRICT  # define STRICT
# Line 390  dir = (directory_type *) malloc(sizeof(* Line 469  dir = (directory_type *) malloc(sizeof(*
469  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
470    {    {
471    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
472    exit(2);    pcregrep_exit(2);
473    }    }
474  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
475  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 449  return !isdirectory(filename); Line 528  return !isdirectory(filename);
528  }  }
529    
530    
531  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
532    
533  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
534    
# Line 459  is_stdout_tty(void) Line 538  is_stdout_tty(void)
538  return FALSE;  return FALSE;
539  }  }
540    
541    static BOOL
542    is_file_tty(FILE *f)
543    {
544    return FALSE;
545    }
546    
547    
548  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
549    
# Line 481  void closedirectory(directory_type *dir) Line 566  void closedirectory(directory_type *dir)
566  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
567    
568    
569  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
570    
571  static BOOL  static BOOL
572  is_stdout_tty(void)  is_stdout_tty(void)
# Line 489  is_stdout_tty(void) Line 574  is_stdout_tty(void)
574  return FALSE;  return FALSE;
575  }  }
576    
577    static BOOL
578    is_file_tty(FILE *f)
579    {
580    return FALSE;
581    }
582    
583  #endif  #endif
584    
# Line 517  return sys_errlist[n]; Line 607  return sys_errlist[n];
607    
608    
609  /*************************************************  /*************************************************
610    *            Read one line of input              *
611    *************************************************/
612    
613    /* Normally, input is read using fread() into a large buffer, so many lines may
614    be read at once. However, doing this for tty input means that no output appears
615    until a lot of input has been typed. Instead, tty input is handled line by
616    line. We cannot use fgets() for this, because it does not stop at a binary
617    zero, and therefore there is no way of telling how many characters it has read,
618    because there may be binary zeros embedded in the data.
619    
620    Arguments:
621      buffer     the buffer to read into
622      length     the maximum number of characters to read
623      f          the file
624    
625    Returns:     the number of characters read, zero at end of file
626    */
627    
628    static int
629    read_one_line(char *buffer, int length, FILE *f)
630    {
631    int c;
632    int yield = 0;
633    while ((c = fgetc(f)) != EOF)
634      {
635      buffer[yield++] = c;
636      if (c == '\n' || yield >= length) break;
637      }
638    return yield;
639    }
640    
641    
642    
643    /*************************************************
644  *             Find end of line                   *  *             Find end of line                   *
645  *************************************************/  *************************************************/
646    
# Line 528  Arguments: Line 652  Arguments:
652    endptr    end of available data    endptr    end of available data
653    lenptr    where to put the length of the eol sequence    lenptr    where to put the length of the eol sequence
654    
655  Returns:    pointer to the last byte of the line  Returns:    pointer after the last byte of the line,
656                including the newline byte(s)
657  */  */
658    
659  static char *  static char *
# Line 811  if (after_context > 0 && lastmatchnumber Line 936  if (after_context > 0 && lastmatchnumber
936      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
937      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
938      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
939      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
940      lastmatchrestart = pp;      lastmatchrestart = pp;
941      }      }
942    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 821  if (after_context > 0 && lastmatchnumber Line 946  if (after_context > 0 && lastmatchnumber
946    
947    
948  /*************************************************  /*************************************************
949    *   Apply patterns to subject till one matches   *
950    *************************************************/
951    
952    /* This function is called to run through all patterns, looking for a match. It
953    is used multiple times for the same subject when colouring is enabled, in order
954    to find all possible matches.
955    
956    Arguments:
957      matchptr     the start of the subject
958      length       the length of the subject to match
959      startoffset  where to start matching
960      offsets      the offets vector to fill in
961      mrc          address of where to put the result of pcre_exec()
962    
963    Returns:      TRUE if there was a match
964                  FALSE if there was no match
965                  invert if there was a non-fatal error
966    */
967    
968    static BOOL
969    match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
970      int *mrc)
971    {
972    int i;
973    size_t slen = length;
974    const char *msg = "this text:\n\n";
975    if (slen > 200)
976      {
977      slen = 200;
978      msg = "text that starts:\n\n";
979      }
980    for (i = 0; i < pattern_count; i++)
981      {
982      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
983        startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
984      if (*mrc >= 0) return TRUE;
985      if (*mrc == PCRE_ERROR_NOMATCH) continue;
986      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
987      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
988      fprintf(stderr, "%s", msg);
989      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
990      fprintf(stderr, "\n\n");
991      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
992          *mrc == PCRE_ERROR_JIT_STACKLIMIT)
993        resource_error = TRUE;
994      if (error_count++ > 20)
995        {
996        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
997        pcregrep_exit(2);
998        }
999      return invert;    /* No more matching; don't show the line again */
1000      }
1001    
1002    return FALSE;  /* No match, no errors */
1003    }
1004    
1005    
1006    
1007    /*************************************************
1008  *            Grep an individual file             *  *            Grep an individual file             *
1009  *************************************************/  *************************************************/
1010    
1011  /* This is called from grep_or_recurse() below. It uses a buffer that is three  /* This is called from grep_or_recurse() below. It uses a buffer that is three
1012  times the value of MBUFTHIRD. The matching point is never allowed to stray into  times the value of bufthird. The matching point is never allowed to stray into
1013  the top third of the buffer, thus keeping more of the file available for  the top third of the buffer, thus keeping more of the file available for
1014  context printing or for multiline scanning. For large files, the pointer will  context printing or for multiline scanning. For large files, the pointer will
1015  be in the middle third most of the time, so the bottom third is available for  be in the middle third most of the time, so the bottom third is available for
# Line 836  Arguments: Line 1020  Arguments:
1020                 the gzFile pointer when reading is via libz                 the gzFile pointer when reading is via libz
1021                 the BZFILE pointer when reading is via libbz2                 the BZFILE pointer when reading is via libbz2
1022    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2    frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1023      filename     the file name or NULL (for errors)
1024    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
1025                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
1026                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
1027    
1028  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
1029                 1 otherwise (no matches)                 1 otherwise (no matches)
1030                 2 if there is a read error on a .bz2 file                 2 if an overlong line is encountered
1031                   3 if there is a read error on a .bz2 file
1032  */  */
1033    
1034  static int  static int
1035  pcregrep(void *handle, int frtype, char *printname)  pcregrep(void *handle, int frtype, char *filename, char *printname)
1036  {  {
1037  int rc = 1;  int rc = 1;
1038  int linenumber = 1;  int linenumber = 1;
1039  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1040  int count = 0;  int count = 0;
1041  int filepos = 0;  int filepos = 0;
1042  int offsets[99];  int offsets[OFFSET_SIZE];
1043  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1044  char buffer[3*MBUFTHIRD];  char *ptr = main_buffer;
 char *ptr = buffer;  
1045  char *endptr;  char *endptr;
1046  size_t bufflength;  size_t bufflength;
1047  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1048    BOOL input_line_buffered = line_buffered;
1049  FILE *in = NULL;                    /* Ensure initialized */  FILE *in = NULL;                    /* Ensure initialized */
1050    
1051  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 880  fail. */ Line 1066  fail. */
1066  if (frtype == FR_LIBZ)  if (frtype == FR_LIBZ)
1067    {    {
1068    ingz = (gzFile)handle;    ingz = (gzFile)handle;
1069    bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);    bufflength = gzread (ingz, main_buffer, bufsize);
1070    }    }
1071  else  else
1072  #endif  #endif
# Line 889  else Line 1075  else
1075  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
1076    {    {
1077    inbz2 = (BZFILE *)handle;    inbz2 = (BZFILE *)handle;
1078    bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);    bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1079    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */    if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1080    }                                    /* without the cast it is unsigned. */    }                                    /* without the cast it is unsigned. */
1081  else  else
# Line 897  else Line 1083  else
1083    
1084    {    {
1085    in = (FILE *)handle;    in = (FILE *)handle;
1086    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);    if (is_file_tty(in)) input_line_buffered = TRUE;
1087      bufflength = input_line_buffered?
1088        read_one_line(main_buffer, bufsize, in) :
1089        fread(main_buffer, 1, bufsize, in);
1090    }    }
1091    
1092  endptr = buffer + bufflength;  endptr = main_buffer + bufflength;
1093    
1094  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
1095  files, endptr will be at the end of the buffer when we are in the middle of the  files, endptr will be at the end of the buffer when we are in the middle of the
# Line 909  way, the buffer is shifted left and re-f Line 1098  way, the buffer is shifted left and re-f
1098    
1099  while (ptr < endptr)  while (ptr < endptr)
1100    {    {
1101    int i, endlinelength;    int endlinelength;
1102    int mrc = 0;    int mrc = 0;
1103    BOOL match = FALSE;    int startoffset = 0;
1104      BOOL match;
1105    char *matchptr = ptr;    char *matchptr = ptr;
1106    char *t = ptr;    char *t = ptr;
1107    size_t length, linelength;    size_t length, linelength;
# Line 919  while (ptr < endptr) Line 1109  while (ptr < endptr)
1109    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1110    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1111    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1112    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1113    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1114    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1115      first line. */
1116    
1117    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1118    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1119    length = multiline? (size_t)(endptr - ptr) : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1120    
1121      /* Check to see if the line we are looking at extends right to the very end
1122      of the buffer without a line terminator. This means the line is too long to
1123      handle. */
1124    
1125      if (endlinelength == 0 && t == main_buffer + bufsize)
1126        {
1127        fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1128                        "pcregrep: check the --buffer-size option\n",
1129                        linenumber,
1130                        (filename == NULL)? "" : " of file ",
1131                        (filename == NULL)? "" : filename);
1132        return 2;
1133        }
1134    
1135    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1136    
1137  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
# Line 936  while (ptr < endptr) Line 1141  while (ptr < endptr)
1141        #include <time.h>        #include <time.h>
1142        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1143        struct timezone dummy;        struct timezone dummy;
1144          int i;
1145    
1146        if (jfriedl_XT)        if (jfriedl_XT)
1147        {        {
# Line 944  while (ptr < endptr) Line 1150  while (ptr < endptr)
1150            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1151            if (!ptr) {            if (!ptr) {
1152                    printf("out of memory");                    printf("out of memory");
1153                    exit(2);                    pcregrep_exit(2);
1154            }            }
1155            endptr = ptr;            endptr = ptr;
1156            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 961  while (ptr < endptr) Line 1167  while (ptr < endptr)
1167    
1168    
1169        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1170            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1171                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1172    
1173        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1174                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 980  while (ptr < endptr) Line 1187  while (ptr < endptr)
1187    
1188    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1189    
1190    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches or there is an error other
1191    the final newline in the subject string. */    than NOMATCH. This code is in a subroutine so that it can be re-used for
1192      finding subsequent matches when colouring matched lines. */
1193    
1194    for (i = 0; i < pattern_count; i++)    match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
     {  
     mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1195    
1196    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1197    
# Line 1029  while (ptr < endptr) Line 1210  while (ptr < endptr)
1210      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1211      in the file. */      in the file. */
1212    
1213      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1214        {        {
1215        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1216        return 0;        return 0;
# Line 1039  while (ptr < endptr) Line 1220  while (ptr < endptr)
1220    
1221      else if (quiet) return 0;      else if (quiet) return 0;
1222    
1223      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, or a
1224      the --file-offsets and --line-offsets options output offsets for the      captured portion of it, as long as this string is not empty, and the
1225      matching substring (they both force --only-matching). None of these options      --file-offsets and --line-offsets options output offsets for the matching
1226      prints any context. Afterwards, adjust the start and length, and then jump      substring (they both force --only-matching = 0). None of these options
1227      back to look for further matches in the same line. If we are in invert      prints any context. Afterwards, adjust the start and then jump back to look
1228      mode, however, nothing is printed - this could be still useful because the      for further matches in the same line. If we are in invert mode, however,
1229      return code is set. */      nothing is printed and we do not restart - this could still be useful
1230        because the return code is set. */
1231    
1232      else if (only_matching)      else if (only_matching >= 0)
1233        {        {
1234        if (!invert)        if (!invert)
1235          {          {
1236          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1237          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1238          if (line_offsets)          if (line_offsets)
1239            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1240              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1241          else if (file_offsets)          else if (file_offsets)
1242            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n",
1243                (int)(filepos + matchptr + offsets[0] - ptr),
1244              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1245          else          else if (only_matching < mrc)
1246            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            {
1247          fprintf(stdout, "\n");            int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1248          matchptr += offsets[1];            if (plen > 0)
1249          length -= offsets[1];              {
1250                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1251                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1252                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1253                fprintf(stdout, "\n");
1254                }
1255              }
1256            else if (printname != NULL || number) fprintf(stdout, "\n");
1257          match = FALSE;          match = FALSE;
1258            if (line_buffered) fflush(stdout);
1259            rc = 0;                      /* Had some success */
1260            startoffset = offsets[1];    /* Restart after the match */
1261          goto ONLY_MATCHING_RESTART;          goto ONLY_MATCHING_RESTART;
1262          }          }
1263        }        }
# Line 1100  while (ptr < endptr) Line 1293  while (ptr < endptr)
1293            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1294            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1295            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1296            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1297            lastmatchrestart = pp;            lastmatchrestart = pp;
1298            }            }
1299          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1123  while (ptr < endptr) Line 1316  while (ptr < endptr)
1316          int linecount = 0;          int linecount = 0;
1317          char *p = ptr;          char *p = ptr;
1318    
1319          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1320                 linecount < before_context)                 linecount < before_context)
1321            {            {
1322            linecount++;            linecount++;
1323            p = previous_line(p, buffer);            p = previous_line(p, main_buffer);
1324            }            }
1325    
1326          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 1140  while (ptr < endptr) Line 1333  while (ptr < endptr)
1333            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1334            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1335            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1336            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1337            p = pp;            p = pp;
1338            }            }
1339          }          }
# Line 1160  while (ptr < endptr) Line 1353  while (ptr < endptr)
1353        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1354        the match will always be before the first newline sequence. */        the match will always be before the first newline sequence. */
1355    
1356        if (multiline)        if (multiline & !invert)
1357          {          {
1358          int ellength;          char *endmatch = ptr + offsets[1];
1359          char *endmatch = ptr;          t = ptr;
1360          if (!invert)          while (t < endmatch)
1361            {            {
1362            endmatch += offsets[1];            t = end_of_line(t, endptr, &endlinelength);
1363            t = ptr;            if (t < endmatch) linenumber++; else break;
           while (t < endmatch)  
             {  
             t = end_of_line(t, endptr, &ellength);  
             if (t <= endmatch) linenumber++; else break;  
             }  
1364            }            }
1365          endmatch = end_of_line(endmatch, endptr, &ellength);          linelength = t - ptr - endlinelength;
         linelength = endmatch - ptr - ellength;  
1366          }          }
1367    
1368        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 1190  while (ptr < endptr) Line 1377  while (ptr < endptr)
1377          {          {
1378          int first = S_arg * 2;          int first = S_arg * 2;
1379          int last  = first + 1;          int last  = first + 1;
1380          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1381          fprintf(stdout, "X");          fprintf(stdout, "X");
1382          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1383          }          }
1384        else        else
1385  #endif  #endif
1386    
1387        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1388          matches, but not of course if the line is a non-match. */
1389    
1390        if (do_colour)        if (do_colour && !invert)
1391          {          {
1392          fwrite(ptr, 1, offsets[0], stdout);          int plength;
1393            FWRITE(ptr, 1, offsets[0], stdout);
1394          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1395          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1396          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1397          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1398            stdout);            {
1399              startoffset = offsets[1];
1400              if (startoffset >= (int)linelength + endlinelength ||
1401                  !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1402                break;
1403              FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1404              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1405              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1406              fprintf(stdout, "%c[00m", 0x1b);
1407              }
1408    
1409            /* In multiline mode, we may have already printed the complete line
1410            and its line-ending characters (if they matched the pattern), so there
1411            may be no more to print. */
1412    
1413            plength = (int)((linelength + endlinelength) - startoffset);
1414            if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1415          }          }
1416        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1417          /* Not colouring; no need to search for further matches */
1418    
1419          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1420        }        }
1421    
1422      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1423        given, flush the output. */
1424    
1425        if (line_buffered) fflush(stdout);
1426      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1427    
1428      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1244  while (ptr < endptr) Line 1454  while (ptr < endptr)
1454    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1455    
1456    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1457    filepos += linelength + endlinelength;    filepos += (int)(linelength + endlinelength);
1458    linenumber++;    linenumber++;
1459    
1460      /* If input is line buffered, and the buffer is not yet full, read another
1461      line and add it into the buffer. */
1462    
1463      if (input_line_buffered && bufflength < (size_t)bufsize)
1464        {
1465        int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1466        bufflength += add;
1467        endptr += add;
1468        }
1469    
1470    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1471    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1472    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
1473    about to be lost, print them. */    about to be lost, print them. */
1474    
1475    if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)    if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1476      {      {
1477      if (after_context > 0 &&      if (after_context > 0 &&
1478          lastmatchnumber > 0 &&          lastmatchnumber > 0 &&
1479          lastmatchrestart < buffer + MBUFTHIRD)          lastmatchrestart < main_buffer + bufthird)
1480        {        {
1481        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);        do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1482        lastmatchnumber = 0;        lastmatchnumber = 0;
# Line 1264  while (ptr < endptr) Line 1484  while (ptr < endptr)
1484    
1485      /* Now do the shuffle */      /* Now do the shuffle */
1486    
1487      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1488      ptr -= MBUFTHIRD;      ptr -= bufthird;
1489    
1490  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
1491      if (frtype == FR_LIBZ)      if (frtype == FR_LIBZ)
1492        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
1493          gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);          gzread (ingz, main_buffer + 2*bufthird, bufthird);
1494      else      else
1495  #endif  #endif
1496    
1497  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
1498      if (frtype == FR_LIBBZ2)      if (frtype == FR_LIBBZ2)
1499        bufflength = 2*MBUFTHIRD +        bufflength = 2*bufthird +
1500          BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);          BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1501      else      else
1502  #endif  #endif
1503    
1504      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*bufthird +
1505          (input_line_buffered?
1506      endptr = buffer + bufflength;         read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1507           fread(main_buffer + 2*bufthird, 1, bufthird, in));
1508        endptr = main_buffer + bufflength;
1509    
1510      /* Adjust any last match point */      /* Adjust any last match point */
1511    
1512      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;      if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1513      }      }
1514    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1515    
1516  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1517  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1518    
1519  if (!only_matching && !count_only)  if (only_matching < 0 && !count_only)
1520    {    {
1521    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1522    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1313  if (filenames == FN_NOMATCH_ONLY) Line 1535  if (filenames == FN_NOMATCH_ONLY)
1535    
1536  if (count_only)  if (count_only)
1537    {    {
1538    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1539    fprintf(stdout, "%d\n", count);      {
1540        if (printname != NULL && filenames != FN_NONE)
1541          fprintf(stdout, "%s:", printname);
1542        fprintf(stdout, "%d\n", count);
1543        }
1544    }    }
1545    
1546  return rc;  return rc;
# Line 1347  grep_or_recurse(char *pathname, BOOL dir Line 1573  grep_or_recurse(char *pathname, BOOL dir
1573  int rc = 1;  int rc = 1;
1574  int sep;  int sep;
1575  int frtype;  int frtype;
 int pathlen;  
1576  void *handle;  void *handle;
1577  FILE *in = NULL;           /* Ensure initialized */  FILE *in = NULL;           /* Ensure initialized */
1578    
# Line 1359  gzFile ingz = NULL; Line 1584  gzFile ingz = NULL;
1584  BZFILE *inbz2 = NULL;  BZFILE *inbz2 = NULL;
1585  #endif  #endif
1586    
1587    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1588    int pathlen;
1589    #endif
1590    
1591  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1592    
1593  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1594    {    {
1595    return pcregrep(stdin, FR_PLAIN,    return pcregrep(stdin, FR_PLAIN, stdin_name,
1596      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1597        stdin_name : NULL);        stdin_name : NULL);
1598    }    }
# Line 1394  if ((sep = isdirectory(pathname)) != 0) Line 1623  if ((sep = isdirectory(pathname)) != 0)
1623        {        {
1624        int frc, nflen;        int frc, nflen;
1625        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1626        nflen = strlen(nextfile);        nflen = (int)(strlen(nextfile));
1627    
1628        if (isdirectory(buffer))        if (isdirectory(buffer))
1629          {          {
# Line 1438  skipping was not requested. The scan pro Line 1667  skipping was not requested. The scan pro
1667  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1668  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1669    
1670  pathlen = strlen(pathname);  #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1671    pathlen = (int)(strlen(pathname));
1672    #endif
1673    
1674  /* Open using zlib if it is supported and the file name ends with .gz. */  /* Open using zlib if it is supported and the file name ends with .gz. */
1675    
# Line 1478  an attempt to read a .bz2 file indicates Line 1709  an attempt to read a .bz2 file indicates
1709  PLAIN_FILE:  PLAIN_FILE:
1710  #endif  #endif
1711    {    {
1712    in = fopen(pathname, "r");    in = fopen(pathname, "rb");
1713    handle = (void *)in;    handle = (void *)in;
1714    frtype = FR_PLAIN;    frtype = FR_PLAIN;
1715    }    }
# Line 1495  if (handle == NULL) Line 1726  if (handle == NULL)
1726    
1727  /* Now grep the file */  /* Now grep the file */
1728    
1729  rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||  rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1730    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1731    
1732  /* Close in an appropriate manner. */  /* Close in an appropriate manner. */
# Line 1506  if (frtype == FR_LIBZ) Line 1737  if (frtype == FR_LIBZ)
1737  else  else
1738  #endif  #endif
1739    
1740  /* If it is a .bz2 file and the result is 2, it means that the first attempt to  /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1741  read failed. If the error indicates that the file isn't in fact bzipped, try  read failed. If the error indicates that the file isn't in fact bzipped, try
1742  again as a normal file. */  again as a normal file. */
1743    
1744  #ifdef SUPPORT_LIBBZ2  #ifdef SUPPORT_LIBBZ2
1745  if (frtype == FR_LIBBZ2)  if (frtype == FR_LIBBZ2)
1746    {    {
1747    if (rc == 2)    if (rc == 3)
1748      {      {
1749      int errnum;      int errnum;
1750      const char *err = BZ2_bzerror(inbz2, &errnum);      const char *err = BZ2_bzerror(inbz2, &errnum);
# Line 1525  if (frtype == FR_LIBBZ2) Line 1756  if (frtype == FR_LIBBZ2)
1756      else if (!silent)      else if (!silent)
1757        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",        fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1758          pathname, err);          pathname, err);
1759        rc = 2;    /* The normal "something went wrong" code */
1760      }      }
1761    BZ2_bzclose(inbz2);    BZ2_bzclose(inbz2);
1762    }    }
# Line 1600  for (op = optionlist; op->one_char != 0; Line 1832  for (op = optionlist; op->one_char != 0;
1832    {    {
1833    int n;    int n;
1834    char s[4];    char s[4];
1835    
1836      /* Two options were accidentally implemented and documented with underscores
1837      instead of hyphens in their names, something that was not noticed for quite a
1838      few releases. When fixing this, I left the underscored versions in the list
1839      in case people were using them. However, we don't want to display them in the
1840      help data. There are no other options that contain underscores, and we do not
1841      expect ever to implement such options. Therefore, just omit any option that
1842      contains an underscore. */
1843    
1844      if (strchr(op->long_name, '_') != NULL) continue;
1845    
1846    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1847    n = 30 - printf("  %s --%s", s, op->long_name);    n = 31 - printf("  %s --%s", s, op->long_name);
1848    if (n < 1) n = 1;    if (n < 1) n = 1;
1849    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1850    }    }
1851    
1852  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1853    printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1854    printf("When reading patterns from a file instead of using a command line option,\n");
1855  printf("trailing white space is removed and blank lines are ignored.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1856  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1857      MAX_PATTERN_COUNT, PATBUFSIZE);
1858    
1859  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1860  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
# Line 1627  handle_option(int letter, int options) Line 1873  handle_option(int letter, int options)
1873  switch(letter)  switch(letter)
1874    {    {
1875    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
1876    case N_HELP: help(); exit(0);    case N_HELP: help(); pcregrep_exit(0);
1877      case N_LBUFFER: line_buffered = TRUE; break;
1878    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
1879      case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1880    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1881    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1882    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1883    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1884    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1885    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1886    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1887    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1888    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1889    case 'o': only_matching = TRUE; break;    case 'o': only_matching = 0; break;
1890    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1891    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1892    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1649  switch(letter) Line 1897  switch(letter)
1897    
1898    case 'V':    case 'V':
1899    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1900    exit(0);    pcregrep_exit(0);
1901    break;    break;
1902    
1903    default:    default:
1904    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1905    exit(usage(2));    pcregrep_exit(usage(2));
1906    }    }
1907    
1908  return options;  return options;
# Line 1709  Returns:         TRUE on success, FALSE Line 1957  Returns:         TRUE on success, FALSE
1957  static BOOL  static BOOL
1958  compile_single_pattern(char *pattern, int options, char *filename, int count)  compile_single_pattern(char *pattern, int options, char *filename, int count)
1959  {  {
1960  char buffer[MBUFTHIRD + 16];  char buffer[PATBUFSIZE];
1961  const char *error;  const char *error;
1962  int errptr;  int errptr;
1963    
# Line 1720  if (pattern_count >= MAX_PATTERN_COUNT) Line 1968  if (pattern_count >= MAX_PATTERN_COUNT)
1968    return FALSE;    return FALSE;
1969    }    }
1970    
1971  sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,  sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1972    suffix[process_options]);    suffix[process_options]);
1973  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1974    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
# Line 1779  compile_pattern(char *pattern, int optio Line 2027  compile_pattern(char *pattern, int optio
2027  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
2028    {    {
2029    char *eop = pattern + strlen(pattern);    char *eop = pattern + strlen(pattern);
2030    char buffer[MBUFTHIRD];    char buffer[PATBUFSIZE];
2031    for(;;)    for(;;)
2032      {      {
2033      int ellength;      int ellength;
# Line 1817  char *patterns[MAX_PATTERN_COUNT]; Line 2065  char *patterns[MAX_PATTERN_COUNT];
2065  const char *locale_from = "--locale";  const char *locale_from = "--locale";
2066  const char *error;  const char *error;
2067    
2068    #ifdef SUPPORT_PCREGREP_JIT
2069    pcre_jit_stack *jit_stack = NULL;
2070    #endif
2071    
2072  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2073  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2074  */  Note that the return values from pcre_config(), though derived from the ASCII
2075    codes, are the same in EBCDIC environments, so we must use the actual values
2076    rather than escapes such as as '\r'. */
2077    
2078  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2079  switch(i)  switch(i)
2080    {    {
2081    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
2082    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
2083    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2084    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
2085    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
2086    }    }
2087    
2088  /* Process the options */  /* Process the options */
# Line 1848  for (i = 1; i < argc; i++) Line 2102  for (i = 1; i < argc; i++)
2102    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2103      {      {
2104      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2105        else exit(usage(2));        else pcregrep_exit(usage(2));
2106      }      }
2107    
2108    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1870  for (i = 1; i < argc; i++) Line 2124  for (i = 1; i < argc; i++)
2124      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2125      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2126      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2127      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2128      these categories, fortunately. */      both these categories. */
2129    
2130      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2131        {        {
2132        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2133        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2134        if (opbra == NULL)     /* Not a (p) case */  
2135          /* Handle options with only one spelling of the name */
2136    
2137          if (opbra == NULL)     /* Does not contain '(' */
2138          {          {
2139          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2140            {            {
# Line 1885  for (i = 1; i < argc; i++) Line 2142  for (i = 1; i < argc; i++)
2142            }            }
2143          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2144            {            {
2145            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2146            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2147                (int)strlen(arg) : (int)(argequals - arg);
2148            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2149              {              {
2150              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1899  for (i = 1; i < argc; i++) Line 2157  for (i = 1; i < argc; i++)
2157              }              }
2158            }            }
2159          }          }
2160        else                   /* Special case xxxx(p) */  
2161          /* Handle options with an alternate spelling of the name */
2162    
2163          else
2164          {          {
2165          char buff1[24];          char buff1[24];
2166          char buff2[24];          char buff2[24];
2167          int baselen = opbra - op->long_name;  
2168            int baselen = (int)(opbra - op->long_name);
2169            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2170            int arglen = (argequals == NULL || equals == NULL)?
2171              (int)strlen(arg) : (int)(argequals - arg);
2172    
2173          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2174          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2175            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2176          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2177               strncmp(arg, buff2, arglen) == 0)
2178              {
2179              if (equals != NULL && argequals != NULL)
2180                {
2181                option_data = argequals;
2182                if (*option_data == '=')
2183                  {
2184                  option_data++;
2185                  longopwasequals = TRUE;
2186                  }
2187                }
2188            break;            break;
2189              }
2190          }          }
2191        }        }
2192    
2193      if (op->one_char == 0)      if (op->one_char == 0)
2194        {        {
2195        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2196        exit(usage(2));        pcregrep_exit(usage(2));
2197        }        }
2198      }      }
2199    
   
2200    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2201    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2202    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1953  for (i = 1; i < argc; i++) Line 2230  for (i = 1; i < argc; i++)
2230      while (*s != 0)      while (*s != 0)
2231        {        {
2232        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2233          { if (*s == op->one_char) break; }          {
2234            if (*s == op->one_char) break;
2235            }
2236        if (op->one_char == 0)        if (op->one_char == 0)
2237          {          {
2238          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2239            *s, argv[i]);            *s, argv[i]);
2240          exit(usage(2));          pcregrep_exit(usage(2));
2241          }          }
2242        if (op->type != OP_NODATA || s[1] == 0)  
2243          /* Check for a single-character option that has data: OP_OP_NUMBER
2244          is used for one that either has a numerical number or defaults, i.e. the
2245          data is optional. If a digit follows, there is data; if not, carry on
2246          with other single-character options in the same string. */
2247    
2248          option_data = s+1;
2249          if (op->type == OP_OP_NUMBER)
2250            {
2251            if (isdigit((unsigned char)s[1])) break;
2252            }
2253          else   /* Check for end or a dataless option */
2254          {          {
2255          option_data = s+1;          if (op->type != OP_NODATA || s[1] == 0) break;
         break;  
2256          }          }
2257    
2258          /* Handle a single-character option with no data, then loop for the
2259          next character in the string. */
2260    
2261        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2262        }        }
2263      }      }
# Line 1981  for (i = 1; i < argc; i++) Line 2274  for (i = 1; i < argc; i++)
2274    
2275    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2276    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2277    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2278    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2279    
2280    if (*option_data == 0 &&    if (*option_data == 0 &&
2281        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1992  for (i = 1; i < argc; i++) Line 2285  for (i = 1; i < argc; i++)
2285        case N_COLOUR:        case N_COLOUR:
2286        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2287        break;        break;
2288    
2289          case 'o':
2290          only_matching = 0;
2291          break;
2292    
2293  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2294        case 'S':        case 'S':
2295        S_arg = 0;        S_arg = 0;
# Line 2008  for (i = 1; i < argc; i++) Line 2306  for (i = 1; i < argc; i++)
2306      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2307        {        {
2308        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2309        exit(usage(2));        pcregrep_exit(usage(2));
2310        }        }
2311      option_data = argv[++i];      option_data = argv[++i];
2312      }      }
# Line 2029  for (i = 1; i < argc; i++) Line 2327  for (i = 1; i < argc; i++)
2327    
2328    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
2329    
2330    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2331               op->type != OP_OP_NUMBER)
2332      {      {
2333      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2334      }      }
2335    
2336      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2337      only for unpicking arguments, so just keep it simple. */
2338    
2339    else    else
2340      {      {
2341      char *endptr;      unsigned long int n = 0;
2342      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2343        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2344        while (isdigit((unsigned char)(*endptr)))
2345          n = n * 10 + (int)(*endptr++ - '0');
2346        if (toupper(*endptr) == 'K')
2347          {
2348          n *= 1024;
2349          endptr++;
2350          }
2351        else if (toupper(*endptr) == 'M')
2352          {
2353          n *= 1024*1024;
2354          endptr++;
2355          }
2356      if (*endptr != 0)      if (*endptr != 0)
2357        {        {
2358        if (longop)        if (longop)
2359          {          {
2360          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2361          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2362            equals - op->long_name;            (int)(equals - op->long_name);
2363          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2364            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2365          }          }
2366        else        else
2367          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2368            option_data, op->one_char);            option_data, op->one_char);
2369        exit(usage(2));        pcregrep_exit(usage(2));
2370        }        }
2371      *((int *)op->dataptr) = n;      if (op->type == OP_LONGNUMBER)
2372            *((unsigned long int *)op->dataptr) = n;
2373        else
2374            *((int *)op->dataptr) = n;
2375      }      }
2376    }    }
2377    
# Line 2066  if (both_context > 0) Line 2385  if (both_context > 0)
2385    }    }
2386    
2387  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2388  However, the latter two set the only_matching flag. */  However, the latter two set only_matching. */
2389    
2390  if ((only_matching && (file_offsets || line_offsets)) ||  if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2391      (file_offsets && line_offsets))      (file_offsets && line_offsets))
2392    {    {
2393    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2394      "and/or --line-offsets\n");      "and/or --line-offsets\n");
2395    exit(usage(2));    pcregrep_exit(usage(2));
2396    }    }
2397    
2398  if (file_offsets || line_offsets) only_matching = TRUE;  if (file_offsets || line_offsets) only_matching = 0;
2399    
2400  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2401  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2200  if (jfriedl_XT != 0 || jfriedl_XR != 0) Line 2519  if (jfriedl_XT != 0 || jfriedl_XR != 0)
2519    }    }
2520  #endif  #endif
2521    
2522  /* Get memory to store the pattern and hints lists. */  /* Get memory for the main buffer, and to store the pattern and hints lists. */
2523    
2524    bufsize = 3*bufthird;
2525    main_buffer = (char *)malloc(bufsize);
2526  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2527  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2528    
2529  if (pattern_list == NULL || hints_list == NULL)  if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2530    {    {
2531    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2532    goto EXIT2;    goto EXIT2;
# Line 2237  if (pattern_filename != NULL) Line 2558  if (pattern_filename != NULL)
2558    int linenumber = 0;    int linenumber = 0;
2559    FILE *f;    FILE *f;
2560    char *filename;    char *filename;
2561    char buffer[MBUFTHIRD];    char buffer[PATBUFSIZE];
2562    
2563    if (strcmp(pattern_filename, "-") == 0)    if (strcmp(pattern_filename, "-") == 0)
2564      {      {
# Line 2256  if (pattern_filename != NULL) Line 2577  if (pattern_filename != NULL)
2577      filename = pattern_filename;      filename = pattern_filename;
2578      }      }
2579    
2580    while (fgets(buffer, MBUFTHIRD, f) != NULL)    while (fgets(buffer, PATBUFSIZE, f) != NULL)
2581      {      {
2582      char *s = buffer + (int)strlen(buffer);      char *s = buffer + (int)strlen(buffer);
2583      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
# Line 2270  if (pattern_filename != NULL) Line 2591  if (pattern_filename != NULL)
2591    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
2592    }    }
2593    
2594  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times. Unless
2595    JIT has been explicitly disabled, arrange a stack for it to use. */
2596    
2597    #ifdef SUPPORT_PCREGREP_JIT
2598    if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2599      jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2600    #endif
2601    
2602  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2603    {    {
2604    hints_list[j] = pcre_study(pattern_list[j], 0, &error);    hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2605    if (error != NULL)    if (error != NULL)
2606      {      {
2607      char s[16];      char s[16];
# Line 2283  for (j = 0; j < pattern_count; j++) Line 2610  for (j = 0; j < pattern_count; j++)
2610      goto EXIT2;      goto EXIT2;
2611      }      }
2612    hint_count++;    hint_count++;
2613    #ifdef SUPPORT_PCREGREP_JIT
2614      if (jit_stack != NULL && hints_list[j] != NULL)
2615        pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2616    #endif
2617      }
2618    
2619    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2620    pcre_extra block for each pattern. */
2621    
2622    if (match_limit > 0 || match_limit_recursion > 0)
2623      {
2624      for (j = 0; j < pattern_count; j++)
2625        {
2626        if (hints_list[j] == NULL)
2627          {
2628          hints_list[j] = malloc(sizeof(pcre_extra));
2629          if (hints_list[j] == NULL)
2630            {
2631            fprintf(stderr, "pcregrep: malloc failed\n");
2632            pcregrep_exit(2);
2633            }
2634          }
2635        if (match_limit > 0)
2636          {
2637          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2638          hints_list[j]->match_limit = match_limit;
2639          }
2640        if (match_limit_recursion > 0)
2641          {
2642          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2643          hints_list[j]->match_limit_recursion = match_limit_recursion;
2644          }
2645        }
2646    }    }
2647    
2648  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 2339  if (include_dir_pattern != NULL) Line 2699  if (include_dir_pattern != NULL)
2699    
2700  if (i >= argc)  if (i >= argc)
2701    {    {
2702    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2703        (filenames > FN_DEFAULT)? stdin_name : NULL);
2704    goto EXIT;    goto EXIT;
2705    }    }
2706    
# Line 2359  for (; i < argc; i++) Line 2720  for (; i < argc; i++)
2720    }    }
2721    
2722  EXIT:  EXIT:
2723    #ifdef SUPPORT_PCREGREP_JIT
2724    if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2725    #endif
2726    if (main_buffer != NULL) free(main_buffer);
2727  if (pattern_list != NULL)  if (pattern_list != NULL)
2728    {    {
2729    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);    for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
# Line 2366  if (pattern_list != NULL) Line 2731  if (pattern_list != NULL)
2731    }    }
2732  if (hints_list != NULL)  if (hints_list != NULL)
2733    {    {
2734    for (i = 0; i < hint_count; i++) free(hints_list[i]);    for (i = 0; i < hint_count; i++)
2735        {
2736        if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2737        }
2738    free(hints_list);    free(hints_list);
2739    }    }
2740  return rc;  pcregrep_exit(rc);
2741    
2742  EXIT2:  EXIT2:
2743  rc = 2;  rc = 2;

Legend:
Removed from v.345  
changed lines
  Added in v.879

  ViewVC Help
Powered by ViewVC 1.1.5