/[pcre]/code/tags/pcre-8.01/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-8.01/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 283 by ph10, Fri Dec 7 19:59:19 2007 UTC revision 422 by ph10, Fri Aug 14 16:42:55 2009 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 55  POSSIBILITY OF SUCH DAMAGE.
55  #include <unistd.h>  #include <unistd.h>
56  #endif  #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 63  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 74  typedef int BOOL; Line 83  typedef int BOOL;
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
# Line 127  static pcre_extra **hints_list = NULL; Line 140  static pcre_extra **hints_list = NULL;
140    
141  static char *include_pattern = NULL;  static char *include_pattern = NULL;
142  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
143    static char *include_dir_pattern = NULL;
144    static char *exclude_dir_pattern = NULL;
145    
146  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
147  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
148    static pcre *include_dir_compiled = NULL;
149    static pcre *exclude_dir_compiled = NULL;
150    
151  static int after_context = 0;  static int after_context = 0;
152  static int before_context = 0;  static int before_context = 0;
# Line 148  static BOOL invert = FALSE; Line 165  static BOOL invert = FALSE;
165  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
166  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
167  static BOOL number = FALSE;  static BOOL number = FALSE;
168    static BOOL omit_zero_count = FALSE;
169  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
170  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
171  static BOOL silent = FALSE;  static BOOL silent = FALSE;
# Line 169  typedef struct option_item { Line 187  typedef struct option_item {
187  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
188  used to identify them. */  used to identify them. */
189    
190  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
191  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
192  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
193  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
194  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
195  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
196  #define N_NULL      (-7)  #define N_LABEL        (-7)
197  #define N_LOFFSETS  (-8)  #define N_LOCALE       (-8)
198  #define N_FOFFSETS  (-9)  #define N_NULL         (-9)
199    #define N_LOFFSETS     (-10)
200    #define N_FOFFSETS     (-11)
201    
202  static option_item optionlist[] = {  static option_item optionlist[] = {
203    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 190  static option_item optionlist[] = { Line 210  static option_item optionlist[] = {
210    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
211    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
212    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
213    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
214    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
215    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
216    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
217    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
# Line 210  static option_item optionlist[] = { Line 230  static option_item optionlist[] = {
230    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
231    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
232    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
233      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
234      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
235  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
236    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
237  #endif  #endif
# Line 322  return isatty(fileno(stdout)); Line 344  return isatty(fileno(stdout));
344    
345  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
346  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
347  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
348  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
349  */  */
350    
351  #elif HAVE_WINDOWS_H  #elif HAVE_WINDOWS_H
# Line 801  if (after_context > 0 && lastmatchnumber Line 823  if (after_context > 0 && lastmatchnumber
823    
824    
825  /*************************************************  /*************************************************
826    *   Apply patterns to subject till one matches   *
827    *************************************************/
828    
829    /* This function is called to run through all patterns, looking for a match. It
830    is used multiple times for the same subject when colouring is enabled, in order
831    to find all possible matches.
832    
833    Arguments:
834      matchptr    the start of the subject
835      length      the length of the subject to match
836      offsets     the offets vector to fill in
837      mrc         address of where to put the result of pcre_exec()
838    
839    Returns:      TRUE if there was a match
840                  FALSE if there was no match
841                  invert if there was a non-fatal error
842    */
843    
844    static BOOL
845    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
846    {
847    int i;
848    for (i = 0; i < pattern_count; i++)
849      {
850      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
851        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
852      if (*mrc >= 0) return TRUE;
853      if (*mrc == PCRE_ERROR_NOMATCH) continue;
854      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
855      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
856      fprintf(stderr, "this text:\n");
857      fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */
858      fprintf(stderr, "\n");
859      if (error_count == 0 &&
860          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
861        {
862        fprintf(stderr, "pcregrep: error %d means that a resource limit "
863          "was exceeded\n", *mrc);
864        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
865        }
866      if (error_count++ > 20)
867        {
868        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
869        exit(2);
870        }
871      return invert;    /* No more matching; don't show the line again */
872      }
873    
874    return FALSE;  /* No match, no errors */
875    }
876    
877    
878    
879    /*************************************************
880  *            Grep an individual file             *  *            Grep an individual file             *
881  *************************************************/  *************************************************/
882    
# Line 812  be in the middle third most of the time, Line 888  be in the middle third most of the time,
888  "before" context printing.  "before" context printing.
889    
890  Arguments:  Arguments:
891    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
892                   the gzFile pointer when reading is via libz
893                   the BZFILE pointer when reading is via libbz2
894      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
895    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
896                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
897                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
898    
899  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
900                 1 otherwise (no matches)                 1 otherwise (no matches)
901                   2 if there is a read error on a .bz2 file
902  */  */
903    
904  static int  static int
905  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
906  {  {
907  int rc = 1;  int rc = 1;
908  int linenumber = 1;  int linenumber = 1;
909  int lastmatchnumber = 0;  int lastmatchnumber = 0;
910  int count = 0;  int count = 0;
911  int filepos = 0;  int filepos = 0;
912  int offsets[99];  int offsets[OFFSET_SIZE];
913  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
914  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
915  char *ptr = buffer;  char *ptr = buffer;
916  char *endptr;  char *endptr;
917  size_t bufflength;  size_t bufflength;
918  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
919    FILE *in = NULL;                    /* Ensure initialized */
920    
921    #ifdef SUPPORT_LIBZ
922    gzFile ingz = NULL;
923    #endif
924    
925    #ifdef SUPPORT_LIBBZ2
926    BZFILE *inbz2 = NULL;
927    #endif
928    
929    
930    /* Do the first read into the start of the buffer and set up the pointer to end
931    of what we have. In the case of libz, a non-zipped .gz file will be read as a
932    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
933    fail. */
934    
935    #ifdef SUPPORT_LIBZ
936    if (frtype == FR_LIBZ)
937      {
938      ingz = (gzFile)handle;
939      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
940      }
941    else
942    #endif
943    
944    #ifdef SUPPORT_LIBBZ2
945    if (frtype == FR_LIBBZ2)
946      {
947      inbz2 = (BZFILE *)handle;
948      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
949      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
950      }                                    /* without the cast it is unsigned. */
951    else
952    #endif
953    
954  /* Do the first read into the start of the buffer and set up the pointer to    {
955  end of what we have. */    in = (FILE *)handle;
956      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
957      }
958    
 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  
959  endptr = buffer + bufflength;  endptr = buffer + bufflength;
960    
961  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 850  way, the buffer is shifted left and re-f Line 965  way, the buffer is shifted left and re-f
965    
966  while (ptr < endptr)  while (ptr < endptr)
967    {    {
968    int i, endlinelength;    int endlinelength;
969    int mrc = 0;    int mrc = 0;
970    BOOL match = FALSE;    BOOL match;
971    char *matchptr = ptr;    char *matchptr = ptr;
972    char *t = ptr;    char *t = ptr;
973    size_t length, linelength;    size_t length, linelength;
974    
975    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
976    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
977    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
978    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
979    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
980    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
981      first line. */
982    
983    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
984    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
# Line 877  while (ptr < endptr) Line 993  while (ptr < endptr)
993        #include <time.h>        #include <time.h>
994        struct timeval start_time, end_time;        struct timeval start_time, end_time;
995        struct timezone dummy;        struct timezone dummy;
996          int i;
997    
998        if (jfriedl_XT)        if (jfriedl_XT)
999        {        {
# Line 902  while (ptr < endptr) Line 1019  while (ptr < endptr)
1019    
1020    
1021        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1022            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1023                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1024    
1025        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1026                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 916  while (ptr < endptr) Line 1034  while (ptr < endptr)
1034    }    }
1035  #endif  #endif
1036    
1037    /* We come back here after a match when the -o option (only_matching) is set,    /* We come back here after a match when the -o option (only_matching) is set,
1038    in order to find any further matches in the same line. */    in order to find any further matches in the same line. */
   
   ONLY_MATCHING_RESTART:  
1039    
1040    /* Run through all the patterns until one matches. Note that we don't include    ONLY_MATCHING_RESTART:
   the final newline in the subject string. */  
1041    
1042    for (i = 0; i < pattern_count; i++)    /* Run through all the patterns until one matches or there is an error other
1043      {    than NOMATCH. This code is in a subroutine so that it can be re-used for
1044      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,    finding subsequent matches when colouring matched lines. */
1045        offsets, 99);  
1046      if (mrc >= 0) { match = TRUE; break; }    match = match_patterns(matchptr, length, offsets, &mrc);
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1047    
1048    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1049    
# Line 970  while (ptr < endptr) Line 1062  while (ptr < endptr)
1062      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1063      in the file. */      in the file. */
1064    
1065      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1066        {        {
1067        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1068        return 0;        return 0;
# Line 981  while (ptr < endptr) Line 1073  while (ptr < endptr)
1073      else if (quiet) return 0;      else if (quiet) return 0;
1074    
1075      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
1076      the --file-offsets and --line-offsets options output offsets for the      the --file-offsets and --line-offsets options output offsets for the
1077      matching substring (they both force --only-matching). None of these options      matching substring (they both force --only-matching). None of these options
1078      prints any context. Afterwards, adjust the start and length, and then jump      prints any context. Afterwards, adjust the start and length, and then jump
1079      back to look for further matches in the same line. If we are in invert      back to look for further matches in the same line. If we are in invert
# Line 991  while (ptr < endptr) Line 1083  while (ptr < endptr)
1083      else if (only_matching)      else if (only_matching)
1084        {        {
1085        if (!invert)        if (!invert)
1086          {          {
1087          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1088          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1089          if (line_offsets)          if (line_offsets)
1090            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1091              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1092          else if (file_offsets)          else if (file_offsets)
1093            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1094              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1095          else          else
1096              {
1097              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1098            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1099              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1100              }
1101          fprintf(stdout, "\n");          fprintf(stdout, "\n");
1102          matchptr += offsets[1];          matchptr += offsets[1];
1103          length -= offsets[1];          length -= offsets[1];
1104          match = FALSE;          match = FALSE;
1105          goto ONLY_MATCHING_RESTART;          goto ONLY_MATCHING_RESTART;
1106          }          }
1107        }        }
1108    
1109      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 1138  while (ptr < endptr) Line 1234  while (ptr < endptr)
1234        else        else
1235  #endif  #endif
1236    
1237        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1238          matches. */
1239    
1240        if (do_colour)        if (do_colour)
1241          {          {
1242            int last_offset = 0;
1243          fwrite(ptr, 1, offsets[0], stdout);          fwrite(ptr, 1, offsets[0], stdout);
1244          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1245          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1246          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1247          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1248              {
1249              last_offset += offsets[1];
1250              matchptr += offsets[1];
1251              length -= offsets[1];
1252              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1253              fwrite(matchptr, 1, offsets[0], stdout);
1254              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1255              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1256              fprintf(stdout, "%c[00m", 0x1b);
1257              }
1258            fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1259            stdout);            stdout);
1260          }          }
1261    
1262          /* Not colouring; no need to search for further matches */
1263    
1264        else fwrite(ptr, 1, linelength + endlinelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
1265        }        }
1266    
# Line 1181  while (ptr < endptr) Line 1293  while (ptr < endptr)
1293      linelength = endmatch - ptr - ellength;      linelength = endmatch - ptr - ellength;
1294      }      }
1295    
1296    /* Advance to after the newline and increment the line number. The file    /* Advance to after the newline and increment the line number. The file
1297    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1298    
1299    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
# Line 1207  while (ptr < endptr) Line 1319  while (ptr < endptr)
1319    
1320      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1321      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1322    
1323    #ifdef SUPPORT_LIBZ
1324        if (frtype == FR_LIBZ)
1325          bufflength = 2*MBUFTHIRD +
1326            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1327        else
1328    #endif
1329    
1330    #ifdef SUPPORT_LIBBZ2
1331        if (frtype == FR_LIBBZ2)
1332          bufflength = 2*MBUFTHIRD +
1333            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1334        else
1335    #endif
1336    
1337      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1338    
1339      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1340    
1341      /* Adjust any last match point */      /* Adjust any last match point */
# Line 1238  if (filenames == FN_NOMATCH_ONLY) Line 1366  if (filenames == FN_NOMATCH_ONLY)
1366    
1367  if (count_only)  if (count_only)
1368    {    {
1369    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1370    fprintf(stdout, "%d\n", count);      {
1371        if (printname != NULL && filenames != FN_NONE)
1372          fprintf(stdout, "%s:", printname);
1373        fprintf(stdout, "%d\n", count);
1374        }
1375    }    }
1376    
1377  return rc;  return rc;
# Line 1271  grep_or_recurse(char *pathname, BOOL dir Line 1403  grep_or_recurse(char *pathname, BOOL dir
1403  {  {
1404  int rc = 1;  int rc = 1;
1405  int sep;  int sep;
1406  FILE *in;  int frtype;
1407    int pathlen;
1408    void *handle;
1409    FILE *in = NULL;           /* Ensure initialized */
1410    
1411    #ifdef SUPPORT_LIBZ
1412    gzFile ingz = NULL;
1413    #endif
1414    
1415    #ifdef SUPPORT_LIBBZ2
1416    BZFILE *inbz2 = NULL;
1417    #endif
1418    
1419  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1420    
1421  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1422    {    {
1423    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1424      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1425        stdin_name : NULL);        stdin_name : NULL);
1426    }    }
1427    
   
1428  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1429  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1430  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1431    system-specific. */
1432    
1433  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1434    {    {
# Line 1306  if ((sep = isdirectory(pathname)) != 0) Line 1449  if ((sep = isdirectory(pathname)) != 0)
1449    
1450      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1451        {        {
1452        int frc, blen;        int frc, nflen;
1453        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1454        blen = strlen(buffer);        nflen = strlen(nextfile);
1455    
1456          if (isdirectory(buffer))
1457            {
1458            if (exclude_dir_compiled != NULL &&
1459                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1460              continue;
1461    
1462            if (include_dir_compiled != NULL &&
1463                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1464              continue;
1465            }
1466          else
1467            {
1468            if (exclude_compiled != NULL &&
1469                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1470              continue;
1471    
1472        if (exclude_compiled != NULL &&          if (include_compiled != NULL &&
1473            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)              pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1474          continue;            continue;
1475            }
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
1476    
1477        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1478        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 1339  skipping was not requested. The scan pro Line 1495  skipping was not requested. The scan pro
1495  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1496  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1497    
1498  in = fopen(pathname, "r");  pathlen = strlen(pathname);
1499  if (in == NULL)  
1500    /* Open using zlib if it is supported and the file name ends with .gz. */
1501    
1502    #ifdef SUPPORT_LIBZ
1503    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1504      {
1505      ingz = gzopen(pathname, "rb");
1506      if (ingz == NULL)
1507        {
1508        if (!silent)
1509          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1510            strerror(errno));
1511        return 2;
1512        }
1513      handle = (void *)ingz;
1514      frtype = FR_LIBZ;
1515      }
1516    else
1517    #endif
1518    
1519    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1520    
1521    #ifdef SUPPORT_LIBBZ2
1522    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1523      {
1524      inbz2 = BZ2_bzopen(pathname, "rb");
1525      handle = (void *)inbz2;
1526      frtype = FR_LIBBZ2;
1527      }
1528    else
1529    #endif
1530    
1531    /* Otherwise use plain fopen(). The label is so that we can come back here if
1532    an attempt to read a .bz2 file indicates that it really is a plain file. */
1533    
1534    #ifdef SUPPORT_LIBBZ2
1535    PLAIN_FILE:
1536    #endif
1537      {
1538      in = fopen(pathname, "rb");
1539      handle = (void *)in;
1540      frtype = FR_PLAIN;
1541      }
1542    
1543    /* All the opening methods return errno when they fail. */
1544    
1545    if (handle == NULL)
1546    {    {
1547    if (!silent)    if (!silent)
1548      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 1348  if (in == NULL) Line 1550  if (in == NULL)
1550    return 2;    return 2;
1551    }    }
1552    
1553  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1554    
1555    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1556    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1557    
1558    /* Close in an appropriate manner. */
1559    
1560    #ifdef SUPPORT_LIBZ
1561    if (frtype == FR_LIBZ)
1562      gzclose(ingz);
1563    else
1564    #endif
1565    
1566    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1567    read failed. If the error indicates that the file isn't in fact bzipped, try
1568    again as a normal file. */
1569    
1570    #ifdef SUPPORT_LIBBZ2
1571    if (frtype == FR_LIBBZ2)
1572      {
1573      if (rc == 2)
1574        {
1575        int errnum;
1576        const char *err = BZ2_bzerror(inbz2, &errnum);
1577        if (errnum == BZ_DATA_ERROR_MAGIC)
1578          {
1579          BZ2_bzclose(inbz2);
1580          goto PLAIN_FILE;
1581          }
1582        else if (!silent)
1583          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1584            pathname, err);
1585        }
1586      BZ2_bzclose(inbz2);
1587      }
1588    else
1589    #endif
1590    
1591    /* Normal file close */
1592    
1593  fclose(in);  fclose(in);
1594    
1595    /* Pass back the yield from pcregrep(). */
1596    
1597  return rc;  return rc;
1598  }  }
1599    
# Line 1392  option_item *op; Line 1634  option_item *op;
1634  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1635  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1636  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1637  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1638  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1639    #ifdef SUPPORT_LIBZ
1640    printf("Files whose names end in .gz are read using zlib.\n");
1641    #endif
1642    
1643    #ifdef SUPPORT_LIBBZ2
1644    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1645    #endif
1646    
1647    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1648    printf("Other files and the standard input are read as plain files.\n\n");
1649    #else
1650    printf("All files are read as plain files, without any interpretation.\n\n");
1651    #endif
1652    
1653    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1654  printf("Options:\n");  printf("Options:\n");
1655    
1656  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 1402  for (op = optionlist; op->one_char != 0; Line 1658  for (op = optionlist; op->one_char != 0;
1658    int n;    int n;
1659    char s[4];    char s[4];
1660    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1661    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1662    if (n < 1) n = 1;    if (n < 1) n = 1;
1663    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1664    }    }
# Line 1428  handle_option(int letter, int options) Line 1683  handle_option(int letter, int options)
1683  {  {
1684  switch(letter)  switch(letter)
1685    {    {
1686    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
1687    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1688    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
1689    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1690    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1691    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1692    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1693    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1694    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1695    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1696    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1697    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
# Line 1621  const char *error; Line 1876  const char *error;
1876    
1877  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
1878  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1879  */  Note that the return values from pcre_config(), though derived from the ASCII
1880    codes, are the same in EBCDIC environments, so we must use the actual values
1881    rather than escapes such as as '\r'. */
1882    
1883  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1884  switch(i)  switch(i)
1885    {    {
1886    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
1887    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
1888    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1889    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
1890    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
1891    }    }
1892    
1893  /* Process the options */  /* Process the options */
# Line 1672  for (i = 1; i < argc; i++) Line 1929  for (i = 1; i < argc; i++)
1929      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
1930      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
1931      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1932      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
1933      these categories, fortunately. */      both these categories. */
1934    
1935      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1936        {        {
1937        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
1938        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
1939        if (opbra == NULL)     /* Not a (p) case */  
1940          /* Handle options with only one spelling of the name */
1941    
1942          if (opbra == NULL)     /* Does not contain '(' */
1943          {          {
1944          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
1945            {            {
# Line 1701  for (i = 1; i < argc; i++) Line 1961  for (i = 1; i < argc; i++)
1961              }              }
1962            }            }
1963          }          }
1964        else                   /* Special case xxxx(p) */  
1965          /* Handle options with an alternate spelling of the name */
1966    
1967          else
1968          {          {
1969          char buff1[24];          char buff1[24];
1970          char buff2[24];          char buff2[24];
1971    
1972          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1973            int fulllen = strchr(op->long_name, ')') - op->long_name + 1;
1974            int arglen = (argequals == NULL || equals == NULL)?
1975              (int)strlen(arg) : argequals - arg;
1976    
1977          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1978          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
1979            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
1980          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
1981               strncmp(arg, buff2, arglen) == 0)
1982              {
1983              if (equals != NULL && argequals != NULL)
1984                {
1985                option_data = argequals;
1986                if (*option_data == '=')
1987                  {
1988                  option_data++;
1989                  longopwasequals = TRUE;
1990                  }
1991                }
1992            break;            break;
1993              }
1994          }          }
1995        }        }
1996    
# Line 1721  for (i = 1; i < argc; i++) Line 2001  for (i = 1; i < argc; i++)
2001        }        }
2002      }      }
2003    
   
2004    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2005    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2006    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1866  if (both_context > 0) Line 2145  if (both_context > 0)
2145    if (after_context == 0) after_context = both_context;    if (after_context == 0) after_context = both_context;
2146    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2147    }    }
2148    
2149  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2150  However, the latter two set the only_matching flag. */  However, the latter two set the only_matching flag. */
2151    
2152  if ((only_matching && (file_offsets || line_offsets)) ||  if ((only_matching && (file_offsets || line_offsets)) ||
2153      (file_offsets && line_offsets))      (file_offsets && line_offsets))
2154    {    {
2155    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2156      "and/or --line-offsets\n");      "and/or --line-offsets\n");
2157    exit(usage(2));    exit(usage(2));
2158    }    }
2159    
2160  if (file_offsets || line_offsets) only_matching = TRUE;  if (file_offsets || line_offsets) only_matching = TRUE;
2161    
2162  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2163  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2113  if (include_pattern != NULL) Line 2392  if (include_pattern != NULL)
2392      }      }
2393    }    }
2394    
2395    if (exclude_dir_pattern != NULL)
2396      {
2397      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2398        pcretables);
2399      if (exclude_dir_compiled == NULL)
2400        {
2401        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2402          errptr, error);
2403        goto EXIT2;
2404        }
2405      }
2406    
2407    if (include_dir_pattern != NULL)
2408      {
2409      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2410        pcretables);
2411      if (include_dir_compiled == NULL)
2412        {
2413        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2414          errptr, error);
2415        goto EXIT2;
2416        }
2417      }
2418    
2419  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2420    
2421  if (i >= argc)  if (i >= argc)
2422    {    {
2423    rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2424    goto EXIT;    goto EXIT;
2425    }    }
2426    

Legend:
Removed from v.283  
changed lines
  Added in v.422

  ViewVC Help
Powered by ViewVC 1.1.5