/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 152 by ph10, Tue Apr 17 15:55:53 2007 UTC revision 378 by ph10, Sun Mar 1 14:13:34 2009 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
41  #  include <config.h>  #include "config.h"
42  #endif  #endif
43    
44  #include <ctype.h>  #include <ctype.h>
# Line 50  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
55  #  include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60  #endif  #endif
61    
62  #include <pcre.h>  #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66    #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
69  #define TRUE 1  #define TRUE 1
# Line 62  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 75  all values greater than FN_DEFAULT. */ Line 85  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
94  enum { dee_READ, dee_SKIP, dee_RECURSE };  enum { dee_READ, dee_SKIP, dee_RECURSE };
# Line 126  static pcre_extra **hints_list = NULL; Line 140  static pcre_extra **hints_list = NULL;
140    
141  static char *include_pattern = NULL;  static char *include_pattern = NULL;
142  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
143    static char *include_dir_pattern = NULL;
144    static char *exclude_dir_pattern = NULL;
145    
146  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
147  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
148    static pcre *include_dir_compiled = NULL;
149    static pcre *exclude_dir_compiled = NULL;
150    
151  static int after_context = 0;  static int after_context = 0;
152  static int before_context = 0;  static int before_context = 0;
# Line 141  static int process_options = 0; Line 159  static int process_options = 0;
159    
160  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
161  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
162    static BOOL file_offsets = FALSE;
163  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
164  static BOOL invert = FALSE;  static BOOL invert = FALSE;
165    static BOOL line_offsets = FALSE;
166  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
167  static BOOL number = FALSE;  static BOOL number = FALSE;
168  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
# Line 166  typedef struct option_item { Line 186  typedef struct option_item {
186  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
187  used to identify them. */  used to identify them. */
188    
189  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
190  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
191  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
192  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
193  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
194  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
195  #define N_NULL      (-7)  #define N_LABEL        (-7)
196    #define N_LOCALE       (-8)
197    #define N_NULL         (-9)
198    #define N_LOFFSETS     (-10)
199    #define N_FOFFSETS     (-11)
200    
201  static option_item optionlist[] = {  static option_item optionlist[] = {
202    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 188  static option_item optionlist[] = { Line 212  static option_item optionlist[] = {
212    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
213    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
214    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
215      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
216    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
217    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
218    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
219    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
220    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
221    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
222      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
223    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
224    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
225    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
226    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
227    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
228    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
229    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
230    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
231    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
232      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
233      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
234  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
235    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
236  #endif  #endif
# Line 315  return isatty(fileno(stdout)); Line 343  return isatty(fileno(stdout));
343    
344  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
345  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
346  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
347    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
348    */
349    
350  #elif HAVE_WINDOWS_H  #elif HAVE_WINDOWS_H
351    
# Line 326  when it did not exist. */ Line 355  when it did not exist. */
355  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
356  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
357  #endif  #endif
358    
359    #include <windows.h>
360    
361  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
362  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
363  #endif  #endif
364    
 #include <windows.h>  
   
365  typedef struct directory_type  typedef struct directory_type
366  {  {
367  HANDLE handle;  HANDLE handle;
# Line 416  regular if they are not directories. */ Line 446  regular if they are not directories. */
446    
447  int isregfile(char *filename)  int isregfile(char *filename)
448  {  {
449  return !isdirectory(filename)  return !isdirectory(filename);
450  }  }
451    
452    
# Line 427  return !isdirectory(filename) Line 457  return !isdirectory(filename)
457  static BOOL  static BOOL
458  is_stdout_tty(void)  is_stdout_tty(void)
459  {  {
460  FALSE;  return FALSE;
461  }  }
462    
463    
# Line 792  if (after_context > 0 && lastmatchnumber Line 822  if (after_context > 0 && lastmatchnumber
822    
823    
824  /*************************************************  /*************************************************
825    *   Apply patterns to subject till one matches   *
826    *************************************************/
827    
828    /* This function is called to run through all patterns, looking for a match. It
829    is used multiple times for the same subject when colouring is enabled, in order
830    to find all possible matches.
831    
832    Arguments:
833      matchptr    the start of the subject
834      length      the length of the subject to match
835      offsets     the offets vector to fill in
836      mrc         address of where to put the result of pcre_exec()
837    
838    Returns:      TRUE if there was a match
839                  FALSE if there was no match
840                  invert if there was a non-fatal error
841    */
842    
843    static BOOL
844    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
845    {
846    int i;
847    for (i = 0; i < pattern_count; i++)
848      {
849      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
850        offsets, OFFSET_SIZE);
851      if (*mrc >= 0) return TRUE;
852      if (*mrc == PCRE_ERROR_NOMATCH) continue;
853      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
854      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
855      fprintf(stderr, "this text:\n");
856      fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */
857      fprintf(stderr, "\n");
858      if (error_count == 0 &&
859          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
860        {
861        fprintf(stderr, "pcregrep: error %d means that a resource limit "
862          "was exceeded\n", *mrc);
863        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
864        }
865      if (error_count++ > 20)
866        {
867        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
868        exit(2);
869        }
870      return invert;    /* No more matching; don't show the line again */
871      }
872    
873    return FALSE;  /* No match, no errors */
874    }
875    
876    
877    
878    /*************************************************
879  *            Grep an individual file             *  *            Grep an individual file             *
880  *************************************************/  *************************************************/
881    
# Line 803  be in the middle third most of the time, Line 887  be in the middle third most of the time,
887  "before" context printing.  "before" context printing.
888    
889  Arguments:  Arguments:
890    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
891                   the gzFile pointer when reading is via libz
892                   the BZFILE pointer when reading is via libbz2
893      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
894    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
895                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
896                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
897    
898  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
899                 1 otherwise (no matches)                 1 otherwise (no matches)
900                   2 if there is a read error on a .bz2 file
901  */  */
902    
903  static int  static int
904  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
905  {  {
906  int rc = 1;  int rc = 1;
907  int linenumber = 1;  int linenumber = 1;
908  int lastmatchnumber = 0;  int lastmatchnumber = 0;
909  int count = 0;  int count = 0;
910  int offsets[99];  int filepos = 0;
911    int offsets[OFFSET_SIZE];
912  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
913  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
914  char *ptr = buffer;  char *ptr = buffer;
915  char *endptr;  char *endptr;
916  size_t bufflength;  size_t bufflength;
917  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
918    FILE *in = NULL;                    /* Ensure initialized */
919    
920    #ifdef SUPPORT_LIBZ
921    gzFile ingz = NULL;
922    #endif
923    
924    #ifdef SUPPORT_LIBBZ2
925    BZFILE *inbz2 = NULL;
926    #endif
927    
928    
929    /* Do the first read into the start of the buffer and set up the pointer to end
930    of what we have. In the case of libz, a non-zipped .gz file will be read as a
931    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
932    fail. */
933    
934    #ifdef SUPPORT_LIBZ
935    if (frtype == FR_LIBZ)
936      {
937      ingz = (gzFile)handle;
938      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
939      }
940    else
941    #endif
942    
943  /* Do the first read into the start of the buffer and set up the pointer to  #ifdef SUPPORT_LIBBZ2
944  end of what we have. */  if (frtype == FR_LIBBZ2)
945      {
946      inbz2 = (BZFILE *)handle;
947      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
948      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
949      }                                    /* without the cast it is unsigned. */
950    else
951    #endif
952    
953      {
954      in = (FILE *)handle;
955      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
956      }
957    
 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  
958  endptr = buffer + bufflength;  endptr = buffer + bufflength;
959    
960  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 840  way, the buffer is shifted left and re-f Line 964  way, the buffer is shifted left and re-f
964    
965  while (ptr < endptr)  while (ptr < endptr)
966    {    {
967    int i, endlinelength;    int endlinelength;
968    int mrc = 0;    int mrc = 0;
969    BOOL match = FALSE;    BOOL match;
970      char *matchptr = ptr;
971    char *t = ptr;    char *t = ptr;
972    size_t length, linelength;    size_t length, linelength;
973    
974    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
975    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
976    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
977    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
978    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
979    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
980      first line. */
981    
982    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
983    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
984    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
985    
986    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
987    
# Line 866  while (ptr < endptr) Line 992  while (ptr < endptr)
992        #include <time.h>        #include <time.h>
993        struct timeval start_time, end_time;        struct timeval start_time, end_time;
994        struct timezone dummy;        struct timezone dummy;
995          int i;
996    
997        if (jfriedl_XT)        if (jfriedl_XT)
998        {        {
# Line 891  while (ptr < endptr) Line 1018  while (ptr < endptr)
1018    
1019    
1020        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1021            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, OFFSET_SIZE) >= 0);
1022    
1023        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1024                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 905  while (ptr < endptr) Line 1032  while (ptr < endptr)
1032    }    }
1033  #endif  #endif
1034    
1035      /* We come back here after a match when the -o option (only_matching) is set,
1036      in order to find any further matches in the same line. */
1037    
1038    /* Run through all the patterns until one matches. Note that we don't include    ONLY_MATCHING_RESTART:
   the final newline in the subject string. */  
1039    
1040    for (i = 0; i < pattern_count; i++)    /* Run through all the patterns until one matches or there is an error other
1041      {    than NOMATCH. This code is in a subroutine so that it can be re-used for
1042      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    finding subsequent matches when colouring matched lines. */
1043        offsets, 99);  
1044      if (mrc >= 0) { match = TRUE; break; }    match = match_patterns(matchptr, length, offsets, &mrc);
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1045    
1046    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1047    
# Line 966  while (ptr < endptr) Line 1071  while (ptr < endptr)
1071      else if (quiet) return 0;      else if (quiet) return 0;
1072    
1073      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
1074      does not pring any context. */      the --file-offsets and --line-offsets options output offsets for the
1075        matching substring (they both force --only-matching). None of these options
1076        prints any context. Afterwards, adjust the start and length, and then jump
1077        back to look for further matches in the same line. If we are in invert
1078        mode, however, nothing is printed - this could be still useful because the
1079        return code is set. */
1080    
1081      else if (only_matching)      else if (only_matching)
1082        {        {
1083        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1084        if (number) fprintf(stdout, "%d:", linenumber);          {
1085        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1086        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1087            if (line_offsets)
1088              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1089                offsets[1] - offsets[0]);
1090            else if (file_offsets)
1091              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1092                offsets[1] - offsets[0]);
1093            else
1094              {
1095              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1096              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1097              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1098              }
1099            fprintf(stdout, "\n");
1100            matchptr += offsets[1];
1101            length -= offsets[1];
1102            match = FALSE;
1103            goto ONLY_MATCHING_RESTART;
1104            }
1105        }        }
1106    
1107      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 1063  while (ptr < endptr) Line 1191  while (ptr < endptr)
1191    
1192        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1193        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1194        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1195        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1196          the match will always be before the first newline sequence. */
1197    
1198        if (multiline)        if (multiline)
1199          {          {
1200          int ellength;          int ellength;
1201          char *endmatch = ptr + offsets[1];          char *endmatch = ptr;
1202          t = ptr;          if (!invert)
         while (t < endmatch)  
1203            {            {
1204            t = end_of_line(t, endptr, &ellength);            endmatch += offsets[1];
1205            if (t <= endmatch) linenumber++; else break;            t = ptr;
1206              while (t < endmatch)
1207                {
1208                t = end_of_line(t, endptr, &ellength);
1209                if (t <= endmatch) linenumber++; else break;
1210                }
1211            }            }
1212          endmatch = end_of_line(endmatch, endptr, &ellength);          endmatch = end_of_line(endmatch, endptr, &ellength);
1213          linelength = endmatch - ptr - ellength;          linelength = endmatch - ptr - ellength;
# Line 1099  while (ptr < endptr) Line 1232  while (ptr < endptr)
1232        else        else
1233  #endif  #endif
1234    
1235        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1236          matches. */
1237    
1238        if (do_colour)        if (do_colour)
1239          {          {
1240            int last_offset = 0;
1241          fwrite(ptr, 1, offsets[0], stdout);          fwrite(ptr, 1, offsets[0], stdout);
1242          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1243          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1244          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1245          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1246              {
1247              last_offset += offsets[1];
1248              matchptr += offsets[1];
1249              length -= offsets[1];
1250              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1251              fwrite(matchptr, 1, offsets[0], stdout);
1252              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1253              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1254              fprintf(stdout, "%c[00m", 0x1b);
1255              }
1256            fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1257              stdout);
1258          }          }
1259    
1260          /* Not colouring; no need to search for further matches */
1261    
1262        else fwrite(ptr, 1, linelength + endlinelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
1263        }        }
1264    
# Line 1123  while (ptr < endptr) Line 1273  while (ptr < endptr)
1273      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1274      }      }
1275    
1276    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1277      anything to be printed), we have to move on to the end of the match before
1278      proceeding. */
1279    
1280      if (multiline && invert && match)
1281        {
1282        int ellength;
1283        char *endmatch = ptr + offsets[1];
1284        t = ptr;
1285        while (t < endmatch)
1286          {
1287          t = end_of_line(t, endptr, &ellength);
1288          if (t <= endmatch) linenumber++; else break;
1289          }
1290        endmatch = end_of_line(endmatch, endptr, &ellength);
1291        linelength = endmatch - ptr - ellength;
1292        }
1293    
1294      /* Advance to after the newline and increment the line number. The file
1295      offset to the current line is maintained in filepos. */
1296    
1297    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1298      filepos += linelength + endlinelength;
1299    linenumber++;    linenumber++;
1300    
1301    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 1147  while (ptr < endptr) Line 1317  while (ptr < endptr)
1317    
1318      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1319      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1320    
1321    #ifdef SUPPORT_LIBZ
1322        if (frtype == FR_LIBZ)
1323          bufflength = 2*MBUFTHIRD +
1324            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1325        else
1326    #endif
1327    
1328    #ifdef SUPPORT_LIBBZ2
1329        if (frtype == FR_LIBBZ2)
1330          bufflength = 2*MBUFTHIRD +
1331            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1332        else
1333    #endif
1334    
1335      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1336    
1337      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1338    
1339      /* Adjust any last match point */      /* Adjust any last match point */
# Line 1211  grep_or_recurse(char *pathname, BOOL dir Line 1397  grep_or_recurse(char *pathname, BOOL dir
1397  {  {
1398  int rc = 1;  int rc = 1;
1399  int sep;  int sep;
1400  FILE *in;  int frtype;
1401    int pathlen;
1402    void *handle;
1403    FILE *in = NULL;           /* Ensure initialized */
1404    
1405    #ifdef SUPPORT_LIBZ
1406    gzFile ingz = NULL;
1407    #endif
1408    
1409    #ifdef SUPPORT_LIBBZ2
1410    BZFILE *inbz2 = NULL;
1411    #endif
1412    
1413  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1414    
1415  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1416    {    {
1417    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1418      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1419        stdin_name : NULL);        stdin_name : NULL);
1420    }    }
1421    
   
1422  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1423  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1424  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1425    system-specific. */
1426    
1427  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1428    {    {
# Line 1246  if ((sep = isdirectory(pathname)) != 0) Line 1443  if ((sep = isdirectory(pathname)) != 0)
1443    
1444      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1445        {        {
1446        int frc, blen;        int frc, nflen;
1447        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1448        blen = strlen(buffer);        nflen = strlen(nextfile);
1449    
1450          if (isdirectory(buffer))
1451            {
1452            if (exclude_dir_compiled != NULL &&
1453                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1454              continue;
1455    
1456            if (include_dir_compiled != NULL &&
1457                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1458              continue;
1459            }
1460          else
1461            {
1462            if (exclude_compiled != NULL &&
1463                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1464              continue;
1465    
1466        if (exclude_compiled != NULL &&          if (include_compiled != NULL &&
1467            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)              pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1468          continue;            continue;
1469            }
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
1470    
1471        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1472        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 1279  skipping was not requested. The scan pro Line 1489  skipping was not requested. The scan pro
1489  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1490  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1491    
1492  in = fopen(pathname, "r");  pathlen = strlen(pathname);
1493  if (in == NULL)  
1494    /* Open using zlib if it is supported and the file name ends with .gz. */
1495    
1496    #ifdef SUPPORT_LIBZ
1497    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1498      {
1499      ingz = gzopen(pathname, "rb");
1500      if (ingz == NULL)
1501        {
1502        if (!silent)
1503          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1504            strerror(errno));
1505        return 2;
1506        }
1507      handle = (void *)ingz;
1508      frtype = FR_LIBZ;
1509      }
1510    else
1511    #endif
1512    
1513    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1514    
1515    #ifdef SUPPORT_LIBBZ2
1516    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1517      {
1518      inbz2 = BZ2_bzopen(pathname, "rb");
1519      handle = (void *)inbz2;
1520      frtype = FR_LIBBZ2;
1521      }
1522    else
1523    #endif
1524    
1525    /* Otherwise use plain fopen(). The label is so that we can come back here if
1526    an attempt to read a .bz2 file indicates that it really is a plain file. */
1527    
1528    #ifdef SUPPORT_LIBBZ2
1529    PLAIN_FILE:
1530    #endif
1531      {
1532      in = fopen(pathname, "r");
1533      handle = (void *)in;
1534      frtype = FR_PLAIN;
1535      }
1536    
1537    /* All the opening methods return errno when they fail. */
1538    
1539    if (handle == NULL)
1540    {    {
1541    if (!silent)    if (!silent)
1542      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 1288  if (in == NULL) Line 1544  if (in == NULL)
1544    return 2;    return 2;
1545    }    }
1546    
1547  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1548    
1549    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1550    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1551    
1552    /* Close in an appropriate manner. */
1553    
1554    #ifdef SUPPORT_LIBZ
1555    if (frtype == FR_LIBZ)
1556      gzclose(ingz);
1557    else
1558    #endif
1559    
1560    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1561    read failed. If the error indicates that the file isn't in fact bzipped, try
1562    again as a normal file. */
1563    
1564    #ifdef SUPPORT_LIBBZ2
1565    if (frtype == FR_LIBBZ2)
1566      {
1567      if (rc == 2)
1568        {
1569        int errnum;
1570        const char *err = BZ2_bzerror(inbz2, &errnum);
1571        if (errnum == BZ_DATA_ERROR_MAGIC)
1572          {
1573          BZ2_bzclose(inbz2);
1574          goto PLAIN_FILE;
1575          }
1576        else if (!silent)
1577          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1578            pathname, err);
1579        }
1580      BZ2_bzclose(inbz2);
1581      }
1582    else
1583    #endif
1584    
1585    /* Normal file close */
1586    
1587  fclose(in);  fclose(in);
1588    
1589    /* Pass back the yield from pcregrep(). */
1590    
1591  return rc;  return rc;
1592  }  }
1593    
# Line 1312  for (op = optionlist; op->one_char != 0; Line 1608  for (op = optionlist; op->one_char != 0;
1608    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1609    }    }
1610  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1611  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1612      "options.\n");
1613  return rc;  return rc;
1614  }  }
1615    
# Line 1331  option_item *op; Line 1628  option_item *op;
1628  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1629  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1630  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1631  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1632  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1633    #ifdef SUPPORT_LIBZ
1634    printf("Files whose names end in .gz are read using zlib.\n");
1635    #endif
1636    
1637    #ifdef SUPPORT_LIBBZ2
1638    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1639    #endif
1640    
1641    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1642    printf("Other files and the standard input are read as plain files.\n\n");
1643    #else
1644    printf("All files are read as plain files, without any interpretation.\n\n");
1645    #endif
1646    
1647    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1648  printf("Options:\n");  printf("Options:\n");
1649    
1650  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 1341  for (op = optionlist; op->one_char != 0; Line 1652  for (op = optionlist; op->one_char != 0;
1652    int n;    int n;
1653    char s[4];    char s[4];
1654    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1655    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1656    if (n < 1) n = 1;    if (n < 1) n = 1;
1657    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1658    }    }
# Line 1367  handle_option(int letter, int options) Line 1677  handle_option(int letter, int options)
1677  {  {
1678  switch(letter)  switch(letter)
1679    {    {
1680      case N_FOFFSETS: file_offsets = TRUE; break;
1681    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1682      case N_LOFFSETS: line_offsets = number = TRUE; break;
1683    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1684    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1685    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
# Line 1625  for (i = 1; i < argc; i++) Line 1937  for (i = 1; i < argc; i++)
1937          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1938            {            {
1939            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1940            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1941            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1942              {              {
1943              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1804  if (both_context > 0) Line 2116  if (both_context > 0)
2116    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2117    }    }
2118    
2119    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2120    However, the latter two set the only_matching flag. */
2121    
2122    if ((only_matching && (file_offsets || line_offsets)) ||
2123        (file_offsets && line_offsets))
2124      {
2125      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2126        "and/or --line-offsets\n");
2127      exit(usage(2));
2128      }
2129    
2130    if (file_offsets || line_offsets) only_matching = TRUE;
2131    
2132  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2133  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2134    
# Line 2037  if (include_pattern != NULL) Line 2362  if (include_pattern != NULL)
2362      }      }
2363    }    }
2364    
2365    if (exclude_dir_pattern != NULL)
2366      {
2367      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2368        pcretables);
2369      if (exclude_dir_compiled == NULL)
2370        {
2371        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2372          errptr, error);
2373        goto EXIT2;
2374        }
2375      }
2376    
2377    if (include_dir_pattern != NULL)
2378      {
2379      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2380        pcretables);
2381      if (include_dir_compiled == NULL)
2382        {
2383        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2384          errptr, error);
2385        goto EXIT2;
2386        }
2387      }
2388    
2389  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2390    
2391  if (i >= argc)  if (i >= argc)
2392    {    {
2393    rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2394    goto EXIT;    goto EXIT;
2395    }    }
2396    

Legend:
Removed from v.152  
changed lines
  Added in v.378

  ViewVC Help
Powered by ViewVC 1.1.5