/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 152 by ph10, Tue Apr 17 15:55:53 2007 UTC revision 535 by ph10, Thu Jun 3 19:18:24 2010 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
41  #  include <config.h>  #include "config.h"
42  #endif  #endif
43    
44  #include <ctype.h>  #include <ctype.h>
# Line 50  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
55  #  include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64  #endif  #endif
65    
66  #include <pcre.h>  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
69  #define TRUE 1  #define TRUE 1
# Line 62  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 73  typedef int BOOL; Line 83  typedef int BOOL;
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
# Line 90  enum { DEE_READ, DEE_SKIP }; Line 104  enum { DEE_READ, DEE_SKIP };
104    
105  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 126  static pcre_extra **hints_list = NULL; Line 148  static pcre_extra **hints_list = NULL;
148    
149  static char *include_pattern = NULL;  static char *include_pattern = NULL;
150  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
155  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159  static int after_context = 0;  static int after_context = 0;
160  static int before_context = 0;  static int before_context = 0;
# Line 141  static int process_options = 0; Line 167  static int process_options = 0;
167    
168  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
169  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
170    static BOOL file_offsets = FALSE;
171  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
172  static BOOL invert = FALSE;  static BOOL invert = FALSE;
173    static BOOL line_buffered = FALSE;
174    static BOOL line_offsets = FALSE;
175  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
176  static BOOL number = FALSE;  static BOOL number = FALSE;
177    static BOOL omit_zero_count = FALSE;
178  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
179  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
180  static BOOL silent = FALSE;  static BOOL silent = FALSE;
# Line 166  typedef struct option_item { Line 196  typedef struct option_item {
196  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
197  used to identify them. */  used to identify them. */
198    
199  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
200  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
201  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
202  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
203  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
204  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
205  #define N_NULL      (-7)  #define N_LABEL        (-7)
206    #define N_LOCALE       (-8)
207    #define N_NULL         (-9)
208    #define N_LOFFSETS     (-10)
209    #define N_FOFFSETS     (-11)
210    #define N_LBUFFER      (-12)
211    
212  static option_item optionlist[] = {  static option_item optionlist[] = {
213    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 185  static option_item optionlist[] = { Line 220  static option_item optionlist[] = {
220    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
221    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
222    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
223    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
224    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
225    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
226      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
227    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
228    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
229    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
230    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
231    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
232    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
233      { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
234      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
235    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
236    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
237    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
238    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
239    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
240    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
241    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
242    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
243    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
244      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
245      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
246  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
247    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
248  #endif  #endif
# Line 302  return (statbuf.st_mode & S_IFMT) == S_I Line 342  return (statbuf.st_mode & S_IFMT) == S_I
342  }  }
343    
344    
345  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
346    
347  static BOOL  static BOOL
348  is_stdout_tty(void)  is_stdout_tty(void)
# Line 310  is_stdout_tty(void) Line 350  is_stdout_tty(void)
350  return isatty(fileno(stdout));  return isatty(fileno(stdout));
351  }  }
352    
353    static BOOL
354    is_file_tty(FILE *f)
355    {
356    return isatty(fileno(f));
357    }
358    
359    
360  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
361    
362  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
363  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
364  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
365    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
366    */
367    
368  #elif HAVE_WINDOWS_H  #elif HAVE_WINDOWS_H
369    
# Line 326  when it did not exist. */ Line 373  when it did not exist. */
373  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
374  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
375  #endif  #endif
376    
377    #include <windows.h>
378    
379  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
380  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
381  #endif  #endif
382    
 #include <windows.h>  
   
383  typedef struct directory_type  typedef struct directory_type
384  {  {
385  HANDLE handle;  HANDLE handle;
# Line 416  regular if they are not directories. */ Line 464  regular if they are not directories. */
464    
465  int isregfile(char *filename)  int isregfile(char *filename)
466  {  {
467  return !isdirectory(filename)  return !isdirectory(filename);
468  }  }
469    
470    
471  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
472    
473  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
474    
475  static BOOL  static BOOL
476  is_stdout_tty(void)  is_stdout_tty(void)
477  {  {
478  FALSE;  return FALSE;
479    }
480    
481    static BOOL
482    is_file_tty(FILE *f)
483    {
484    return FALSE;
485  }  }
486    
487    
# Line 452  void closedirectory(directory_type *dir) Line 506  void closedirectory(directory_type *dir)
506  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
507    
508    
509  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
510    
511  static BOOL  static BOOL
512  is_stdout_tty(void)  is_stdout_tty(void)
# Line 460  is_stdout_tty(void) Line 514  is_stdout_tty(void)
514  return FALSE;  return FALSE;
515  }  }
516    
517    static BOOL
518    is_file_tty(FILE *f)
519    {
520    return FALSE;
521    }
522    
523  #endif  #endif
524    
# Line 488  return sys_errlist[n]; Line 547  return sys_errlist[n];
547    
548    
549  /*************************************************  /*************************************************
550    *            Read one line of input              *
551    *************************************************/
552    
553    /* Normally, input is read using fread() into a large buffer, so many lines may
554    be read at once. However, doing this for tty input means that no output appears
555    until a lot of input has been typed. Instead, tty input is handled line by
556    line. We cannot use fgets() for this, because it does not stop at a binary
557    zero, and therefore there is no way of telling how many characters it has read,
558    because there may be binary zeros embedded in the data.
559    
560    Arguments:
561      buffer     the buffer to read into
562      length     the maximum number of characters to read
563      f          the file
564    
565    Returns:     the number of characters read, zero at end of file
566    */
567    
568    static int
569    read_one_line(char *buffer, int length, FILE *f)
570    {
571    int c;
572    int yield = 0;
573    while ((c = fgetc(f)) != EOF)
574      {
575      buffer[yield++] = c;
576      if (c == '\n' || yield >= length) break;
577      }
578    return yield;
579    }
580    
581    
582    
583    /*************************************************
584  *             Find end of line                   *  *             Find end of line                   *
585  *************************************************/  *************************************************/
586    
# Line 782  if (after_context > 0 && lastmatchnumber Line 875  if (after_context > 0 && lastmatchnumber
875      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
876      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
877      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
878      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
879      lastmatchrestart = pp;      lastmatchrestart = pp;
880      }      }
881    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 792  if (after_context > 0 && lastmatchnumber Line 885  if (after_context > 0 && lastmatchnumber
885    
886    
887  /*************************************************  /*************************************************
888    *   Apply patterns to subject till one matches   *
889    *************************************************/
890    
891    /* This function is called to run through all patterns, looking for a match. It
892    is used multiple times for the same subject when colouring is enabled, in order
893    to find all possible matches.
894    
895    Arguments:
896      matchptr    the start of the subject
897      length      the length of the subject to match
898      offsets     the offets vector to fill in
899      mrc         address of where to put the result of pcre_exec()
900    
901    Returns:      TRUE if there was a match
902                  FALSE if there was no match
903                  invert if there was a non-fatal error
904    */
905    
906    static BOOL
907    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
908    {
909    int i;
910    for (i = 0; i < pattern_count; i++)
911      {
912      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
913        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
914      if (*mrc >= 0) return TRUE;
915      if (*mrc == PCRE_ERROR_NOMATCH) continue;
916      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
917      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
918      fprintf(stderr, "this text:\n");
919      FWRITE(matchptr, 1, length, stderr);   /* In case binary zero included */
920      fprintf(stderr, "\n");
921      if (error_count == 0 &&
922          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
923        {
924        fprintf(stderr, "pcregrep: error %d means that a resource limit "
925          "was exceeded\n", *mrc);
926        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
927        }
928      if (error_count++ > 20)
929        {
930        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
931        exit(2);
932        }
933      return invert;    /* No more matching; don't show the line again */
934      }
935    
936    return FALSE;  /* No match, no errors */
937    }
938    
939    
940    
941    /*************************************************
942  *            Grep an individual file             *  *            Grep an individual file             *
943  *************************************************/  *************************************************/
944    
# Line 803  be in the middle third most of the time, Line 950  be in the middle third most of the time,
950  "before" context printing.  "before" context printing.
951    
952  Arguments:  Arguments:
953    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
954                   the gzFile pointer when reading is via libz
955                   the BZFILE pointer when reading is via libbz2
956      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
957    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
958                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
959                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
960    
961  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
962                 1 otherwise (no matches)                 1 otherwise (no matches)
963                   2 if there is a read error on a .bz2 file
964  */  */
965    
966  static int  static int
967  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
968  {  {
969  int rc = 1;  int rc = 1;
970  int linenumber = 1;  int linenumber = 1;
971  int lastmatchnumber = 0;  int lastmatchnumber = 0;
972  int count = 0;  int count = 0;
973  int offsets[99];  int filepos = 0;
974    int offsets[OFFSET_SIZE];
975  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
976  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
977  char *ptr = buffer;  char *ptr = buffer;
978  char *endptr;  char *endptr;
979  size_t bufflength;  size_t bufflength;
980  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
981    BOOL input_line_buffered = line_buffered;
982    FILE *in = NULL;                    /* Ensure initialized */
983    
984    #ifdef SUPPORT_LIBZ
985    gzFile ingz = NULL;
986    #endif
987    
988    #ifdef SUPPORT_LIBBZ2
989    BZFILE *inbz2 = NULL;
990    #endif
991    
 /* Do the first read into the start of the buffer and set up the pointer to  
 end of what we have. */  
992    
993  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  /* Do the first read into the start of the buffer and set up the pointer to end
994    of what we have. In the case of libz, a non-zipped .gz file will be read as a
995    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
996    fail. */
997    
998    #ifdef SUPPORT_LIBZ
999    if (frtype == FR_LIBZ)
1000      {
1001      ingz = (gzFile)handle;
1002      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1003      }
1004    else
1005    #endif
1006    
1007    #ifdef SUPPORT_LIBBZ2
1008    if (frtype == FR_LIBBZ2)
1009      {
1010      inbz2 = (BZFILE *)handle;
1011      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1012      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1013      }                                    /* without the cast it is unsigned. */
1014    else
1015    #endif
1016    
1017      {
1018      in = (FILE *)handle;
1019      if (is_file_tty(in)) input_line_buffered = TRUE;
1020      bufflength = input_line_buffered?
1021        read_one_line(buffer, 3*MBUFTHIRD, in) :
1022        fread(buffer, 1, 3*MBUFTHIRD, in);
1023      }
1024    
1025  endptr = buffer + bufflength;  endptr = buffer + bufflength;
1026    
1027  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 840  way, the buffer is shifted left and re-f Line 1031  way, the buffer is shifted left and re-f
1031    
1032  while (ptr < endptr)  while (ptr < endptr)
1033    {    {
1034    int i, endlinelength;    int endlinelength;
1035    int mrc = 0;    int mrc = 0;
1036    BOOL match = FALSE;    BOOL match;
1037      char *matchptr = ptr;
1038    char *t = ptr;    char *t = ptr;
1039    size_t length, linelength;    size_t length, linelength;
1040    
1041    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1042    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1043    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1044    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1045    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1046    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1047      first line. */
1048    
1049    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1050    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1051    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1052    
1053    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1054    
# Line 866  while (ptr < endptr) Line 1059  while (ptr < endptr)
1059        #include <time.h>        #include <time.h>
1060        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1061        struct timezone dummy;        struct timezone dummy;
1062          int i;
1063    
1064        if (jfriedl_XT)        if (jfriedl_XT)
1065        {        {
# Line 891  while (ptr < endptr) Line 1085  while (ptr < endptr)
1085    
1086    
1087        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1088            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1089                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1090    
1091        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1092                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 905  while (ptr < endptr) Line 1100  while (ptr < endptr)
1100    }    }
1101  #endif  #endif
1102    
1103      /* We come back here after a match when the -o option (only_matching) is set,
1104      in order to find any further matches in the same line. */
1105    
1106    /* Run through all the patterns until one matches. Note that we don't include    ONLY_MATCHING_RESTART:
   the final newline in the subject string. */  
1107    
1108    for (i = 0; i < pattern_count; i++)    /* Run through all the patterns until one matches or there is an error other
1109      {    than NOMATCH. This code is in a subroutine so that it can be re-used for
1110      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    finding subsequent matches when colouring matched lines. */
1111        offsets, 99);  
1112      if (mrc >= 0) { match = TRUE; break; }    match = match_patterns(matchptr, length, offsets, &mrc);
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1113    
1114    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1115    
# Line 955  while (ptr < endptr) Line 1128  while (ptr < endptr)
1128      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1129      in the file. */      in the file. */
1130    
1131      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1132        {        {
1133        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1134        return 0;        return 0;
# Line 966  while (ptr < endptr) Line 1139  while (ptr < endptr)
1139      else if (quiet) return 0;      else if (quiet) return 0;
1140    
1141      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
1142      does not pring any context. */      the --file-offsets and --line-offsets options output offsets for the
1143        matching substring (they both force --only-matching). None of these options
1144        prints any context. Afterwards, adjust the start and length, and then jump
1145        back to look for further matches in the same line. If we are in invert
1146        mode, however, nothing is printed - this could be still useful because the
1147        return code is set. */
1148    
1149      else if (only_matching)      else if (only_matching)
1150        {        {
1151        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1152        if (number) fprintf(stdout, "%d:", linenumber);          {
1153        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1154        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1155            if (line_offsets)
1156              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1157                offsets[1] - offsets[0]);
1158            else if (file_offsets)
1159              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1160                offsets[1] - offsets[0]);
1161            else
1162              {
1163              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1164              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1165              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1166              }
1167            fprintf(stdout, "\n");
1168            matchptr += offsets[1];
1169            length -= offsets[1];
1170            match = FALSE;
1171            goto ONLY_MATCHING_RESTART;
1172            }
1173        }        }
1174    
1175      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 1007  while (ptr < endptr) Line 1203  while (ptr < endptr)
1203            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1204            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1205            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1206            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1207            lastmatchrestart = pp;            lastmatchrestart = pp;
1208            }            }
1209          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1047  while (ptr < endptr) Line 1243  while (ptr < endptr)
1243            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1244            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1245            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1246            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1247            p = pp;            p = pp;
1248            }            }
1249          }          }
# Line 1063  while (ptr < endptr) Line 1259  while (ptr < endptr)
1259    
1260        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1261        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1262        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1263        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1264          the match will always be before the first newline sequence. */
1265    
1266        if (multiline)        if (multiline)
1267          {          {
1268          int ellength;          int ellength;
1269          char *endmatch = ptr + offsets[1];          char *endmatch = ptr;
1270          t = ptr;          if (!invert)
         while (t < endmatch)  
1271            {            {
1272            t = end_of_line(t, endptr, &ellength);            endmatch += offsets[1];
1273            if (t <= endmatch) linenumber++; else break;            t = ptr;
1274              while (t < endmatch)
1275                {
1276                t = end_of_line(t, endptr, &ellength);
1277                if (t <= endmatch) linenumber++; else break;
1278                }
1279            }            }
1280          endmatch = end_of_line(endmatch, endptr, &ellength);          endmatch = end_of_line(endmatch, endptr, &ellength);
1281          linelength = endmatch - ptr - ellength;          linelength = endmatch - ptr - ellength;
# Line 1092  while (ptr < endptr) Line 1293  while (ptr < endptr)
1293          {          {
1294          int first = S_arg * 2;          int first = S_arg * 2;
1295          int last  = first + 1;          int last  = first + 1;
1296          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1297          fprintf(stdout, "X");          fprintf(stdout, "X");
1298          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1299          }          }
1300        else        else
1301  #endif  #endif
1302    
1303        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1304          matches. */
1305    
1306        if (do_colour)        if (do_colour)
1307          {          {
1308          fwrite(ptr, 1, offsets[0], stdout);          int last_offset = 0;
1309            FWRITE(ptr, 1, offsets[0], stdout);
1310          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1311          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1312          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1313          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1314              {
1315              last_offset += offsets[1];
1316              matchptr += offsets[1];
1317              length -= offsets[1];
1318              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1319              FWRITE(matchptr, 1, offsets[0], stdout);
1320              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1321              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1322              fprintf(stdout, "%c[00m", 0x1b);
1323              }
1324            FWRITE(ptr + last_offset, 1,
1325              (linelength + endlinelength) - last_offset, stdout);
1326          }          }
1327        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1328          /* Not colouring; no need to search for further matches */
1329    
1330          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1331        }        }
1332    
1333      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1334        given, flush the output. */
1335    
1336        if (line_buffered) fflush(stdout);
1337      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1338    
1339      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1123  while (ptr < endptr) Line 1343  while (ptr < endptr)
1343      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1344      }      }
1345    
1346    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1347      anything to be printed), we have to move on to the end of the match before
1348      proceeding. */
1349    
1350      if (multiline && invert && match)
1351        {
1352        int ellength;
1353        char *endmatch = ptr + offsets[1];
1354        t = ptr;
1355        while (t < endmatch)
1356          {
1357          t = end_of_line(t, endptr, &ellength);
1358          if (t <= endmatch) linenumber++; else break;
1359          }
1360        endmatch = end_of_line(endmatch, endptr, &ellength);
1361        linelength = endmatch - ptr - ellength;
1362        }
1363    
1364      /* Advance to after the newline and increment the line number. The file
1365      offset to the current line is maintained in filepos. */
1366    
1367    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1368      filepos += (int)(linelength + endlinelength);
1369    linenumber++;    linenumber++;
1370    
1371      /* If input is line buffered, and the buffer is not yet full, read another
1372      line and add it into the buffer. */
1373    
1374      if (input_line_buffered && bufflength < sizeof(buffer))
1375        {
1376        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1377        bufflength += add;
1378        endptr += add;
1379        }
1380    
1381    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1382    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1383    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
# Line 1147  while (ptr < endptr) Line 1397  while (ptr < endptr)
1397    
1398      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1399      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1400      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1401    #ifdef SUPPORT_LIBZ
1402        if (frtype == FR_LIBZ)
1403          bufflength = 2*MBUFTHIRD +
1404            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1405        else
1406    #endif
1407    
1408    #ifdef SUPPORT_LIBBZ2
1409        if (frtype == FR_LIBBZ2)
1410          bufflength = 2*MBUFTHIRD +
1411            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1412        else
1413    #endif
1414    
1415        bufflength = 2*MBUFTHIRD +
1416          (input_line_buffered?
1417           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1418           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1419      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1420    
1421      /* Adjust any last match point */      /* Adjust any last match point */
# Line 1178  if (filenames == FN_NOMATCH_ONLY) Line 1446  if (filenames == FN_NOMATCH_ONLY)
1446    
1447  if (count_only)  if (count_only)
1448    {    {
1449    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1450    fprintf(stdout, "%d\n", count);      {
1451        if (printname != NULL && filenames != FN_NONE)
1452          fprintf(stdout, "%s:", printname);
1453        fprintf(stdout, "%d\n", count);
1454        }
1455    }    }
1456    
1457  return rc;  return rc;
# Line 1211  grep_or_recurse(char *pathname, BOOL dir Line 1483  grep_or_recurse(char *pathname, BOOL dir
1483  {  {
1484  int rc = 1;  int rc = 1;
1485  int sep;  int sep;
1486  FILE *in;  int frtype;
1487    int pathlen;
1488    void *handle;
1489    FILE *in = NULL;           /* Ensure initialized */
1490    
1491    #ifdef SUPPORT_LIBZ
1492    gzFile ingz = NULL;
1493    #endif
1494    
1495    #ifdef SUPPORT_LIBBZ2
1496    BZFILE *inbz2 = NULL;
1497    #endif
1498    
1499  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1500    
1501  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1502    {    {
1503    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1504      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1505        stdin_name : NULL);        stdin_name : NULL);
1506    }    }
1507    
   
1508  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1509  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1510  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1511    system-specific. */
1512    
1513  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1514    {    {
# Line 1246  if ((sep = isdirectory(pathname)) != 0) Line 1529  if ((sep = isdirectory(pathname)) != 0)
1529    
1530      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1531        {        {
1532        int frc, blen;        int frc, nflen;
1533        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1534        blen = strlen(buffer);        nflen = (int)(strlen(nextfile));
1535    
1536        if (exclude_compiled != NULL &&        if (isdirectory(buffer))
1537            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)          {
1538          continue;          if (exclude_dir_compiled != NULL &&
1539                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1540        if (include_compiled != NULL &&            continue;
1541            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
1542          continue;          if (include_dir_compiled != NULL &&
1543                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1544              continue;
1545            }
1546          else
1547            {
1548            if (exclude_compiled != NULL &&
1549                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1550              continue;
1551    
1552            if (include_compiled != NULL &&
1553                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1554              continue;
1555            }
1556    
1557        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1558        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 1279  skipping was not requested. The scan pro Line 1575  skipping was not requested. The scan pro
1575  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1576  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1577    
1578  in = fopen(pathname, "r");  pathlen = (int)(strlen(pathname));
1579  if (in == NULL)  
1580    /* Open using zlib if it is supported and the file name ends with .gz. */
1581    
1582    #ifdef SUPPORT_LIBZ
1583    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1584      {
1585      ingz = gzopen(pathname, "rb");
1586      if (ingz == NULL)
1587        {
1588        if (!silent)
1589          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1590            strerror(errno));
1591        return 2;
1592        }
1593      handle = (void *)ingz;
1594      frtype = FR_LIBZ;
1595      }
1596    else
1597    #endif
1598    
1599    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1600    
1601    #ifdef SUPPORT_LIBBZ2
1602    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1603      {
1604      inbz2 = BZ2_bzopen(pathname, "rb");
1605      handle = (void *)inbz2;
1606      frtype = FR_LIBBZ2;
1607      }
1608    else
1609    #endif
1610    
1611    /* Otherwise use plain fopen(). The label is so that we can come back here if
1612    an attempt to read a .bz2 file indicates that it really is a plain file. */
1613    
1614    #ifdef SUPPORT_LIBBZ2
1615    PLAIN_FILE:
1616    #endif
1617      {
1618      in = fopen(pathname, "rb");
1619      handle = (void *)in;
1620      frtype = FR_PLAIN;
1621      }
1622    
1623    /* All the opening methods return errno when they fail. */
1624    
1625    if (handle == NULL)
1626    {    {
1627    if (!silent)    if (!silent)
1628      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 1288  if (in == NULL) Line 1630  if (in == NULL)
1630    return 2;    return 2;
1631    }    }
1632    
1633  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1634    
1635    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1636    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1637    
1638    /* Close in an appropriate manner. */
1639    
1640    #ifdef SUPPORT_LIBZ
1641    if (frtype == FR_LIBZ)
1642      gzclose(ingz);
1643    else
1644    #endif
1645    
1646    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1647    read failed. If the error indicates that the file isn't in fact bzipped, try
1648    again as a normal file. */
1649    
1650    #ifdef SUPPORT_LIBBZ2
1651    if (frtype == FR_LIBBZ2)
1652      {
1653      if (rc == 2)
1654        {
1655        int errnum;
1656        const char *err = BZ2_bzerror(inbz2, &errnum);
1657        if (errnum == BZ_DATA_ERROR_MAGIC)
1658          {
1659          BZ2_bzclose(inbz2);
1660          goto PLAIN_FILE;
1661          }
1662        else if (!silent)
1663          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1664            pathname, err);
1665        }
1666      BZ2_bzclose(inbz2);
1667      }
1668    else
1669    #endif
1670    
1671    /* Normal file close */
1672    
1673  fclose(in);  fclose(in);
1674    
1675    /* Pass back the yield from pcregrep(). */
1676    
1677  return rc;  return rc;
1678  }  }
1679    
# Line 1312  for (op = optionlist; op->one_char != 0; Line 1694  for (op = optionlist; op->one_char != 0;
1694    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1695    }    }
1696  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1697  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1698      "options.\n");
1699  return rc;  return rc;
1700  }  }
1701    
# Line 1331  option_item *op; Line 1714  option_item *op;
1714  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1715  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1716  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1717  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1718  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1719    #ifdef SUPPORT_LIBZ
1720    printf("Files whose names end in .gz are read using zlib.\n");
1721    #endif
1722    
1723    #ifdef SUPPORT_LIBBZ2
1724    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1725    #endif
1726    
1727    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1728    printf("Other files and the standard input are read as plain files.\n\n");
1729    #else
1730    printf("All files are read as plain files, without any interpretation.\n\n");
1731    #endif
1732    
1733    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1734  printf("Options:\n");  printf("Options:\n");
1735    
1736  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 1341  for (op = optionlist; op->one_char != 0; Line 1738  for (op = optionlist; op->one_char != 0;
1738    int n;    int n;
1739    char s[4];    char s[4];
1740    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1741    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1742    if (n < 1) n = 1;    if (n < 1) n = 1;
1743    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1744    }    }
# Line 1367  handle_option(int letter, int options) Line 1763  handle_option(int letter, int options)
1763  {  {
1764  switch(letter)  switch(letter)
1765    {    {
1766      case N_FOFFSETS: file_offsets = TRUE; break;
1767    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1768      case N_LOFFSETS: line_offsets = number = TRUE; break;
1769      case N_LBUFFER: line_buffered = TRUE; break;
1770    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1771    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1772    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1773    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1774    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1775    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1776    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1777    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1778    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
# Line 1558  const char *error; Line 1957  const char *error;
1957    
1958  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
1959  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1960  */  Note that the return values from pcre_config(), though derived from the ASCII
1961    codes, are the same in EBCDIC environments, so we must use the actual values
1962    rather than escapes such as as '\r'. */
1963    
1964  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1965  switch(i)  switch(i)
1966    {    {
1967    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
1968    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
1969    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1970    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
1971    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
1972    }    }
1973    
1974  /* Process the options */  /* Process the options */
# Line 1609  for (i = 1; i < argc; i++) Line 2010  for (i = 1; i < argc; i++)
2010      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2011      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2012      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2013      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2014      these categories, fortunately. */      both these categories. */
2015    
2016      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2017        {        {
2018        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2019        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2020        if (opbra == NULL)     /* Not a (p) case */  
2021          /* Handle options with only one spelling of the name */
2022    
2023          if (opbra == NULL)     /* Does not contain '(' */
2024          {          {
2025          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2026            {            {
# Line 1624  for (i = 1; i < argc; i++) Line 2028  for (i = 1; i < argc; i++)
2028            }            }
2029          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2030            {            {
2031            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2032            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2033                (int)strlen(arg) : (int)(argequals - arg);
2034            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2035              {              {
2036              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1638  for (i = 1; i < argc; i++) Line 2043  for (i = 1; i < argc; i++)
2043              }              }
2044            }            }
2045          }          }
2046        else                   /* Special case xxxx(p) */  
2047          /* Handle options with an alternate spelling of the name */
2048    
2049          else
2050          {          {
2051          char buff1[24];          char buff1[24];
2052          char buff2[24];          char buff2[24];
2053          int baselen = opbra - op->long_name;  
2054            int baselen = (int)(opbra - op->long_name);
2055            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2056            int arglen = (argequals == NULL || equals == NULL)?
2057              (int)strlen(arg) : (int)(argequals - arg);
2058    
2059          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2060          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2061            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2062          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2063               strncmp(arg, buff2, arglen) == 0)
2064              {
2065              if (equals != NULL && argequals != NULL)
2066                {
2067                option_data = argequals;
2068                if (*option_data == '=')
2069                  {
2070                  option_data++;
2071                  longopwasequals = TRUE;
2072                  }
2073                }
2074            break;            break;
2075              }
2076          }          }
2077        }        }
2078    
# Line 1658  for (i = 1; i < argc; i++) Line 2083  for (i = 1; i < argc; i++)
2083        }        }
2084      }      }
2085    
   
2086    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2087    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2088    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1782  for (i = 1; i < argc; i++) Line 2206  for (i = 1; i < argc; i++)
2206          {          {
2207          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2208          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2209            equals - op->long_name;            (int)(equals - op->long_name);
2210          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2211            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2212          }          }
# Line 1804  if (both_context > 0) Line 2228  if (both_context > 0)
2228    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2229    }    }
2230    
2231    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2232    However, the latter two set the only_matching flag. */
2233    
2234    if ((only_matching && (file_offsets || line_offsets)) ||
2235        (file_offsets && line_offsets))
2236      {
2237      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2238        "and/or --line-offsets\n");
2239      exit(usage(2));
2240      }
2241    
2242    if (file_offsets || line_offsets) only_matching = TRUE;
2243    
2244  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2245  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2246    
# Line 2037  if (include_pattern != NULL) Line 2474  if (include_pattern != NULL)
2474      }      }
2475    }    }
2476    
2477    if (exclude_dir_pattern != NULL)
2478      {
2479      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2480        pcretables);
2481      if (exclude_dir_compiled == NULL)
2482        {
2483        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2484          errptr, error);
2485        goto EXIT2;
2486        }
2487      }
2488    
2489    if (include_dir_pattern != NULL)
2490      {
2491      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2492        pcretables);
2493      if (include_dir_compiled == NULL)
2494        {
2495        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2496          errptr, error);
2497        goto EXIT2;
2498        }
2499      }
2500    
2501  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2502    
2503  if (i >= argc)  if (i >= argc)
2504    {    {
2505    rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2506    goto EXIT;    goto EXIT;
2507    }    }
2508    

Legend:
Removed from v.152  
changed lines
  Added in v.535

  ViewVC Help
Powered by ViewVC 1.1.5