/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 296 by ph10, Tue Jan 1 20:09:30 2008 UTC revision 530 by ph10, Tue Jun 1 13:42:06 2010 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 71  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 82  typedef int BOOL; Line 83  typedef int BOOL;
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88  /* File reading styles */  /* File reading styles */
89    
# Line 103  enum { DEE_READ, DEE_SKIP }; Line 104  enum { DEE_READ, DEE_SKIP };
104    
105  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 139  static pcre_extra **hints_list = NULL; Line 148  static pcre_extra **hints_list = NULL;
148    
149  static char *include_pattern = NULL;  static char *include_pattern = NULL;
150  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
155  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159  static int after_context = 0;  static int after_context = 0;
160  static int before_context = 0;  static int before_context = 0;
# Line 157  static BOOL do_colour = FALSE; Line 170  static BOOL do_colour = FALSE;
170  static BOOL file_offsets = FALSE;  static BOOL file_offsets = FALSE;
171  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
172  static BOOL invert = FALSE;  static BOOL invert = FALSE;
173    static BOOL line_buffered = FALSE;
174  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
175  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
176  static BOOL number = FALSE;  static BOOL number = FALSE;
177    static BOOL omit_zero_count = FALSE;
178  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
179  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
180  static BOOL silent = FALSE;  static BOOL silent = FALSE;
# Line 181  typedef struct option_item { Line 196  typedef struct option_item {
196  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
197  used to identify them. */  used to identify them. */
198    
199  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
200  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
201  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
202  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
203  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
204  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
205  #define N_NULL      (-7)  #define N_LABEL        (-7)
206  #define N_LOFFSETS  (-8)  #define N_LOCALE       (-8)
207  #define N_FOFFSETS  (-9)  #define N_NULL         (-9)
208    #define N_LOFFSETS     (-10)
209    #define N_FOFFSETS     (-11)
210    #define N_LBUFFER      (-12)
211    
212  static option_item optionlist[] = {  static option_item optionlist[] = {
213    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 202  static option_item optionlist[] = { Line 220  static option_item optionlist[] = {
220    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
221    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
222    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
223    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
224    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
225    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
226    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
227    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
# Line 212  static option_item optionlist[] = { Line 230  static option_item optionlist[] = {
230    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
231    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
232    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
233      { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
234    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
235    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
236    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
# Line 222  static option_item optionlist[] = { Line 241  static option_item optionlist[] = {
241    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
242    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
243    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
244      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
245      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
246  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
247    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
248  #endif  #endif
# Line 321  return (statbuf.st_mode & S_IFMT) == S_I Line 342  return (statbuf.st_mode & S_IFMT) == S_I
342  }  }
343    
344    
345  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
346    
347  static BOOL  static BOOL
348  is_stdout_tty(void)  is_stdout_tty(void)
# Line 329  is_stdout_tty(void) Line 350  is_stdout_tty(void)
350  return isatty(fileno(stdout));  return isatty(fileno(stdout));
351  }  }
352    
353    static BOOL
354    is_file_tty(FILE *f)
355    {
356    return isatty(fileno(f));
357    }
358    
359    
360  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
361    
# Line 441  return !isdirectory(filename); Line 468  return !isdirectory(filename);
468  }  }
469    
470    
471  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
472    
473  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
474    
# Line 451  is_stdout_tty(void) Line 478  is_stdout_tty(void)
478  return FALSE;  return FALSE;
479  }  }
480    
481    static BOOL
482    is_file_tty(FILE *f)
483    {
484    return FALSE;
485    }
486    
487    
488  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
489    
# Line 473  void closedirectory(directory_type *dir) Line 506  void closedirectory(directory_type *dir)
506  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
507    
508    
509  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
510    
511  static BOOL  static BOOL
512  is_stdout_tty(void)  is_stdout_tty(void)
# Line 481  is_stdout_tty(void) Line 514  is_stdout_tty(void)
514  return FALSE;  return FALSE;
515  }  }
516    
517    static BOOL
518    is_file_tty(FILE *f)
519    {
520    return FALSE;
521    }
522    
523  #endif  #endif
524    
# Line 509  return sys_errlist[n]; Line 547  return sys_errlist[n];
547    
548    
549  /*************************************************  /*************************************************
550    *            Read one line of input              *
551    *************************************************/
552    
553    /* Normally, input is read using fread() into a large buffer, so many lines may
554    be read at once. However, doing this for tty input means that no output appears
555    until a lot of input has been typed. Instead, tty input is handled line by
556    line. We cannot use fgets() for this, because it does not stop at a binary
557    zero, and therefore there is no way of telling how many characters it has read,
558    because there may be binary zeros embedded in the data.
559    
560    Arguments:
561      buffer     the buffer to read into
562      length     the maximum number of characters to read
563      f          the file
564    
565    Returns:     the number of characters read, zero at end of file
566    */
567    
568    static int
569    read_one_line(char *buffer, int length, FILE *f)
570    {
571    int c;
572    int yield = 0;
573    while ((c = fgetc(f)) != EOF)
574      {
575      buffer[yield++] = c;
576      if (c == '\n' || yield >= length) break;
577      }
578    return yield;
579    }
580    
581    
582    
583    /*************************************************
584  *             Find end of line                   *  *             Find end of line                   *
585  *************************************************/  *************************************************/
586    
# Line 803  if (after_context > 0 && lastmatchnumber Line 875  if (after_context > 0 && lastmatchnumber
875      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
876      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
877      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
878      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
879      lastmatchrestart = pp;      lastmatchrestart = pp;
880      }      }
881    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 813  if (after_context > 0 && lastmatchnumber Line 885  if (after_context > 0 && lastmatchnumber
885    
886    
887  /*************************************************  /*************************************************
888    *   Apply patterns to subject till one matches   *
889    *************************************************/
890    
891    /* This function is called to run through all patterns, looking for a match. It
892    is used multiple times for the same subject when colouring is enabled, in order
893    to find all possible matches.
894    
895    Arguments:
896      matchptr    the start of the subject
897      length      the length of the subject to match
898      offsets     the offets vector to fill in
899      mrc         address of where to put the result of pcre_exec()
900    
901    Returns:      TRUE if there was a match
902                  FALSE if there was no match
903                  invert if there was a non-fatal error
904    */
905    
906    static BOOL
907    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
908    {
909    int i;
910    for (i = 0; i < pattern_count; i++)
911      {
912      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
913        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
914      if (*mrc >= 0) return TRUE;
915      if (*mrc == PCRE_ERROR_NOMATCH) continue;
916      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
917      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
918      fprintf(stderr, "this text:\n");
919      FWRITE(matchptr, 1, length, stderr);   /* In case binary zero included */
920      fprintf(stderr, "\n");
921      if (error_count == 0 &&
922          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
923        {
924        fprintf(stderr, "pcregrep: error %d means that a resource limit "
925          "was exceeded\n", *mrc);
926        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
927        }
928      if (error_count++ > 20)
929        {
930        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
931        exit(2);
932        }
933      return invert;    /* No more matching; don't show the line again */
934      }
935    
936    return FALSE;  /* No match, no errors */
937    }
938    
939    
940    
941    /*************************************************
942  *            Grep an individual file             *  *            Grep an individual file             *
943  *************************************************/  *************************************************/
944    
# Line 845  int linenumber = 1; Line 971  int linenumber = 1;
971  int lastmatchnumber = 0;  int lastmatchnumber = 0;
972  int count = 0;  int count = 0;
973  int filepos = 0;  int filepos = 0;
974  int offsets[99];  int offsets[OFFSET_SIZE];
975  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
976  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
977  char *ptr = buffer;  char *ptr = buffer;
978  char *endptr;  char *endptr;
979  size_t bufflength;  size_t bufflength;
980  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
981    BOOL input_line_buffered = line_buffered;
982  FILE *in = NULL;                    /* Ensure initialized */  FILE *in = NULL;                    /* Ensure initialized */
983    
984  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 889  else Line 1016  else
1016    
1017    {    {
1018    in = (FILE *)handle;    in = (FILE *)handle;
1019    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);    if (is_file_tty(in)) input_line_buffered = TRUE;
1020      bufflength = input_line_buffered?
1021        read_one_line(buffer, 3*MBUFTHIRD, in) :
1022        fread(buffer, 1, 3*MBUFTHIRD, in);
1023    }    }
1024    
1025  endptr = buffer + bufflength;  endptr = buffer + bufflength;
1026    
1027  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 901  way, the buffer is shifted left and re-f Line 1031  way, the buffer is shifted left and re-f
1031    
1032  while (ptr < endptr)  while (ptr < endptr)
1033    {    {
1034    int i, endlinelength;    int endlinelength;
1035    int mrc = 0;    int mrc = 0;
1036    BOOL match = FALSE;    BOOL match;
1037    char *matchptr = ptr;    char *matchptr = ptr;
1038    char *t = ptr;    char *t = ptr;
1039    size_t length, linelength;    size_t length, linelength;
# Line 911  while (ptr < endptr) Line 1041  while (ptr < endptr)
1041    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1042    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1043    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1044    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1045    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1046    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1047      first line. */
1048    
1049    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1050    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
# Line 928  while (ptr < endptr) Line 1059  while (ptr < endptr)
1059        #include <time.h>        #include <time.h>
1060        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1061        struct timezone dummy;        struct timezone dummy;
1062          int i;
1063    
1064        if (jfriedl_XT)        if (jfriedl_XT)
1065        {        {
# Line 953  while (ptr < endptr) Line 1085  while (ptr < endptr)
1085    
1086    
1087        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1088            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1089                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1090    
1091        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1092                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 972  while (ptr < endptr) Line 1105  while (ptr < endptr)
1105    
1106    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1107    
1108    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches or there is an error other
1109    the final newline in the subject string. */    than NOMATCH. This code is in a subroutine so that it can be re-used for
1110      finding subsequent matches when colouring matched lines. */
1111    
1112    for (i = 0; i < pattern_count; i++)    match = match_patterns(matchptr, length, offsets, &mrc);
     {  
     mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1113    
1114    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1115    
# Line 1021  while (ptr < endptr) Line 1128  while (ptr < endptr)
1128      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1129      in the file. */      in the file. */
1130    
1131      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1132        {        {
1133        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1134        return 0;        return 0;
# Line 1046  while (ptr < endptr) Line 1153  while (ptr < endptr)
1153          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1154          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1155          if (line_offsets)          if (line_offsets)
1156            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1157              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1158          else if (file_offsets)          else if (file_offsets)
1159            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1160              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1161          else          else
1162            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            {
1163              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1164              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1165              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1166              }
1167          fprintf(stdout, "\n");          fprintf(stdout, "\n");
1168          matchptr += offsets[1];          matchptr += offsets[1];
1169          length -= offsets[1];          length -= offsets[1];
# Line 1092  while (ptr < endptr) Line 1203  while (ptr < endptr)
1203            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1204            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1205            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1206            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1207            lastmatchrestart = pp;            lastmatchrestart = pp;
1208            }            }
1209          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1132  while (ptr < endptr) Line 1243  while (ptr < endptr)
1243            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1244            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1245            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1246            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1247            p = pp;            p = pp;
1248            }            }
1249          }          }
# Line 1182  while (ptr < endptr) Line 1293  while (ptr < endptr)
1293          {          {
1294          int first = S_arg * 2;          int first = S_arg * 2;
1295          int last  = first + 1;          int last  = first + 1;
1296          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1297          fprintf(stdout, "X");          fprintf(stdout, "X");
1298          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1299          }          }
1300        else        else
1301  #endif  #endif
1302    
1303        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1304          matches. */
1305    
1306        if (do_colour)        if (do_colour)
1307          {          {
1308          fwrite(ptr, 1, offsets[0], stdout);          int last_offset = 0;
1309            FWRITE(ptr, 1, offsets[0], stdout);
1310          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1311          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1312          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1313          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1314            stdout);            {
1315              last_offset += offsets[1];
1316              matchptr += offsets[1];
1317              length -= offsets[1];
1318              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1319              FWRITE(matchptr, 1, offsets[0], stdout);
1320              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1321              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1322              fprintf(stdout, "%c[00m", 0x1b);
1323              }
1324            FWRITE(ptr + last_offset, 1,
1325              (linelength + endlinelength) - last_offset, stdout);
1326          }          }
1327        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1328          /* Not colouring; no need to search for further matches */
1329    
1330          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1331        }        }
1332    
1333      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1334        given, flush the output. */
1335    
1336        if (line_buffered) fflush(stdout);
1337      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1338    
1339      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1236  while (ptr < endptr) Line 1365  while (ptr < endptr)
1365    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1366    
1367    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1368    filepos += linelength + endlinelength;    filepos += (int)(linelength + endlinelength);
1369    linenumber++;    linenumber++;
1370    
1371      /* If input is line buffered, and the buffer is not yet full, read another
1372      line and add it into the buffer. */
1373    
1374      if (input_line_buffered && bufflength < sizeof(buffer))
1375        {
1376        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1377        bufflength += add;
1378        endptr += add;
1379        }
1380    
1381    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1382    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
# Line 1273  while (ptr < endptr) Line 1412  while (ptr < endptr)
1412      else      else
1413  #endif  #endif
1414    
1415      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*MBUFTHIRD +
1416          (input_line_buffered?
1417           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1418           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1419      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1420    
1421      /* Adjust any last match point */      /* Adjust any last match point */
# Line 1305  if (filenames == FN_NOMATCH_ONLY) Line 1446  if (filenames == FN_NOMATCH_ONLY)
1446    
1447  if (count_only)  if (count_only)
1448    {    {
1449    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1450    fprintf(stdout, "%d\n", count);      {
1451        if (printname != NULL && filenames != FN_NONE)
1452          fprintf(stdout, "%s:", printname);
1453        fprintf(stdout, "%d\n", count);
1454        }
1455    }    }
1456    
1457  return rc;  return rc;
# Line 1361  if (strcmp(pathname, "-") == 0) Line 1506  if (strcmp(pathname, "-") == 0)
1506    }    }
1507    
1508  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1509  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1510  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1511    system-specific. */
1512    
1513  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1514    {    {
# Line 1383  if ((sep = isdirectory(pathname)) != 0) Line 1529  if ((sep = isdirectory(pathname)) != 0)
1529    
1530      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1531        {        {
1532        int frc, blen;        int frc, nflen;
1533        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1534        blen = strlen(buffer);        nflen = (int)(strlen(nextfile));
1535    
1536          if (isdirectory(buffer))
1537            {
1538            if (exclude_dir_compiled != NULL &&
1539                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1540              continue;
1541    
1542            if (include_dir_compiled != NULL &&
1543                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1544              continue;
1545            }
1546          else
1547            {
1548            if (exclude_compiled != NULL &&
1549                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1550              continue;
1551    
1552        if (exclude_compiled != NULL &&          if (include_compiled != NULL &&
1553            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)              pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1554          continue;            continue;
1555            }
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
1556    
1557        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1558        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 1416  skipping was not requested. The scan pro Line 1575  skipping was not requested. The scan pro
1575  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1576  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1577    
1578  pathlen = strlen(pathname);  pathlen = (int)(strlen(pathname));
1579    
1580  /* Open using zlib if it is supported and the file name ends with .gz. */  /* Open using zlib if it is supported and the file name ends with .gz. */
1581    
# Line 1456  an attempt to read a .bz2 file indicates Line 1615  an attempt to read a .bz2 file indicates
1615  PLAIN_FILE:  PLAIN_FILE:
1616  #endif  #endif
1617    {    {
1618    in = fopen(pathname, "r");    in = fopen(pathname, "rb");
1619    handle = (void *)in;    handle = (void *)in;
1620    frtype = FR_PLAIN;    frtype = FR_PLAIN;
1621    }    }
# Line 1607  switch(letter) Line 1766  switch(letter)
1766    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
1767    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1768    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
1769      case N_LBUFFER: line_buffered = TRUE; break;
1770    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1771    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1772    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1773    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1774    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1775    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1776    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1777    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1778    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
# Line 1797  const char *error; Line 1957  const char *error;
1957    
1958  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
1959  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1960  */  Note that the return values from pcre_config(), though derived from the ASCII
1961    codes, are the same in EBCDIC environments, so we must use the actual values
1962    rather than escapes such as as '\r'. */
1963    
1964  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1965  switch(i)  switch(i)
1966    {    {
1967    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
1968    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
1969    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1970    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
1971    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
1972    }    }
1973    
1974  /* Process the options */  /* Process the options */
# Line 1848  for (i = 1; i < argc; i++) Line 2010  for (i = 1; i < argc; i++)
2010      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2011      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2012      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2013      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2014      these categories, fortunately. */      both these categories. */
2015    
2016      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2017        {        {
2018        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2019        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2020        if (opbra == NULL)     /* Not a (p) case */  
2021          /* Handle options with only one spelling of the name */
2022    
2023          if (opbra == NULL)     /* Does not contain '(' */
2024          {          {
2025          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2026            {            {
# Line 1863  for (i = 1; i < argc; i++) Line 2028  for (i = 1; i < argc; i++)
2028            }            }
2029          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2030            {            {
2031            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2032            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2033                (int)strlen(arg) : (int)(argequals - arg);
2034            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2035              {              {
2036              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1877  for (i = 1; i < argc; i++) Line 2043  for (i = 1; i < argc; i++)
2043              }              }
2044            }            }
2045          }          }
2046        else                   /* Special case xxxx(p) */  
2047          /* Handle options with an alternate spelling of the name */
2048    
2049          else
2050          {          {
2051          char buff1[24];          char buff1[24];
2052          char buff2[24];          char buff2[24];
2053          int baselen = opbra - op->long_name;  
2054            int baselen = (int)(opbra - op->long_name);
2055            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2056            int arglen = (argequals == NULL || equals == NULL)?
2057              (int)strlen(arg) : (int)(argequals - arg);
2058    
2059          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2060          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2061            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2062          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2063               strncmp(arg, buff2, arglen) == 0)
2064              {
2065              if (equals != NULL && argequals != NULL)
2066                {
2067                option_data = argequals;
2068                if (*option_data == '=')
2069                  {
2070                  option_data++;
2071                  longopwasequals = TRUE;
2072                  }
2073                }
2074            break;            break;
2075              }
2076          }          }
2077        }        }
2078    
# Line 1897  for (i = 1; i < argc; i++) Line 2083  for (i = 1; i < argc; i++)
2083        }        }
2084      }      }
2085    
   
2086    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2087    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2088    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 2021  for (i = 1; i < argc; i++) Line 2206  for (i = 1; i < argc; i++)
2206          {          {
2207          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2208          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2209            equals - op->long_name;            (int)(equals - op->long_name);
2210          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2211            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2212          }          }
# Line 2104  if (colour_option != NULL && strcmp(colo Line 2289  if (colour_option != NULL && strcmp(colo
2289      if (cs != NULL) colour_string = cs;      if (cs != NULL) colour_string = cs;
2290      }      }
2291    }    }
2292    
2293  /* Interpret the newline type; the default settings are Unix-like. */  /* Interpret the newline type; the default settings are Unix-like. */
2294    
2295  if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)  if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
# Line 2287  if (include_pattern != NULL) Line 2472  if (include_pattern != NULL)
2472        errptr, error);        errptr, error);
2473      goto EXIT2;      goto EXIT2;
2474      }      }
2475      }
2476    
2477    if (exclude_dir_pattern != NULL)
2478      {
2479      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2480        pcretables);
2481      if (exclude_dir_compiled == NULL)
2482        {
2483        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2484          errptr, error);
2485        goto EXIT2;
2486        }
2487      }
2488    
2489    if (include_dir_pattern != NULL)
2490      {
2491      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2492        pcretables);
2493      if (include_dir_compiled == NULL)
2494        {
2495        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2496          errptr, error);
2497        goto EXIT2;
2498        }
2499    }    }
2500    
2501  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */

Legend:
Removed from v.296  
changed lines
  Added in v.530

  ViewVC Help
Powered by ViewVC 1.1.5