/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 345 by ph10, Mon Apr 28 15:10:02 2008 UTC revision 558 by ph10, Tue Oct 26 15:26:45 2010 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 71  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 82  typedef int BOOL; Line 83  typedef int BOOL;
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88  /* File reading styles */  /* File reading styles */
89    
# Line 103  enum { DEE_READ, DEE_SKIP }; Line 104  enum { DEE_READ, DEE_SKIP };
104    
105  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 161  static BOOL do_colour = FALSE; Line 170  static BOOL do_colour = FALSE;
170  static BOOL file_offsets = FALSE;  static BOOL file_offsets = FALSE;
171  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
172  static BOOL invert = FALSE;  static BOOL invert = FALSE;
173    static BOOL line_buffered = FALSE;
174  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
175  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
176  static BOOL number = FALSE;  static BOOL number = FALSE;
177    static BOOL omit_zero_count = FALSE;
178  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
179  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
180  static BOOL silent = FALSE;  static BOOL silent = FALSE;
# Line 196  used to identify them. */ Line 207  used to identify them. */
207  #define N_NULL         (-9)  #define N_NULL         (-9)
208  #define N_LOFFSETS     (-10)  #define N_LOFFSETS     (-10)
209  #define N_FOFFSETS     (-11)  #define N_FOFFSETS     (-11)
210    #define N_LBUFFER      (-12)
211    
212  static option_item optionlist[] = {  static option_item optionlist[] = {
213    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 208  static option_item optionlist[] = { Line 220  static option_item optionlist[] = {
220    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
221    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
222    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
223    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
224    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
225    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
226    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
227    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
# Line 218  static option_item optionlist[] = { Line 230  static option_item optionlist[] = {
230    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
231    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
232    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
233      { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
234    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
235    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
236    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
# Line 329  return (statbuf.st_mode & S_IFMT) == S_I Line 342  return (statbuf.st_mode & S_IFMT) == S_I
342  }  }
343    
344    
345  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
346    
347  static BOOL  static BOOL
348  is_stdout_tty(void)  is_stdout_tty(void)
# Line 337  is_stdout_tty(void) Line 350  is_stdout_tty(void)
350  return isatty(fileno(stdout));  return isatty(fileno(stdout));
351  }  }
352    
353    static BOOL
354    is_file_tty(FILE *f)
355    {
356    return isatty(fileno(f));
357    }
358    
359    
360  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
361    
# Line 344  return isatty(fileno(stdout)); Line 363  return isatty(fileno(stdout));
363  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
364  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
365  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
366  */  The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
367    undefined when it is indeed undefined. */
368    
369  #elif HAVE_WINDOWS_H  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
370    
371  #ifndef STRICT  #ifndef STRICT
372  # define STRICT  # define STRICT
# Line 449  return !isdirectory(filename); Line 469  return !isdirectory(filename);
469  }  }
470    
471    
472  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
473    
474  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
475    
# Line 459  is_stdout_tty(void) Line 479  is_stdout_tty(void)
479  return FALSE;  return FALSE;
480  }  }
481    
482    static BOOL
483    is_file_tty(FILE *f)
484    {
485    return FALSE;
486    }
487    
488    
489  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
490    
# Line 481  void closedirectory(directory_type *dir) Line 507  void closedirectory(directory_type *dir)
507  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
508    
509    
510  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
511    
512  static BOOL  static BOOL
513  is_stdout_tty(void)  is_stdout_tty(void)
# Line 489  is_stdout_tty(void) Line 515  is_stdout_tty(void)
515  return FALSE;  return FALSE;
516  }  }
517    
518    static BOOL
519    is_file_tty(FILE *f)
520    {
521    return FALSE;
522    }
523    
524  #endif  #endif
525    
# Line 517  return sys_errlist[n]; Line 548  return sys_errlist[n];
548    
549    
550  /*************************************************  /*************************************************
551    *            Read one line of input              *
552    *************************************************/
553    
554    /* Normally, input is read using fread() into a large buffer, so many lines may
555    be read at once. However, doing this for tty input means that no output appears
556    until a lot of input has been typed. Instead, tty input is handled line by
557    line. We cannot use fgets() for this, because it does not stop at a binary
558    zero, and therefore there is no way of telling how many characters it has read,
559    because there may be binary zeros embedded in the data.
560    
561    Arguments:
562      buffer     the buffer to read into
563      length     the maximum number of characters to read
564      f          the file
565    
566    Returns:     the number of characters read, zero at end of file
567    */
568    
569    static int
570    read_one_line(char *buffer, int length, FILE *f)
571    {
572    int c;
573    int yield = 0;
574    while ((c = fgetc(f)) != EOF)
575      {
576      buffer[yield++] = c;
577      if (c == '\n' || yield >= length) break;
578      }
579    return yield;
580    }
581    
582    
583    
584    /*************************************************
585  *             Find end of line                   *  *             Find end of line                   *
586  *************************************************/  *************************************************/
587    
# Line 811  if (after_context > 0 && lastmatchnumber Line 876  if (after_context > 0 && lastmatchnumber
876      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
877      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
878      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
879      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
880      lastmatchrestart = pp;      lastmatchrestart = pp;
881      }      }
882    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 821  if (after_context > 0 && lastmatchnumber Line 886  if (after_context > 0 && lastmatchnumber
886    
887    
888  /*************************************************  /*************************************************
889    *   Apply patterns to subject till one matches   *
890    *************************************************/
891    
892    /* This function is called to run through all patterns, looking for a match. It
893    is used multiple times for the same subject when colouring is enabled, in order
894    to find all possible matches.
895    
896    Arguments:
897      matchptr    the start of the subject
898      length      the length of the subject to match
899      offsets     the offets vector to fill in
900      mrc         address of where to put the result of pcre_exec()
901    
902    Returns:      TRUE if there was a match
903                  FALSE if there was no match
904                  invert if there was a non-fatal error
905    */
906    
907    static BOOL
908    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
909    {
910    int i;
911    for (i = 0; i < pattern_count; i++)
912      {
913      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
914        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
915      if (*mrc >= 0) return TRUE;
916      if (*mrc == PCRE_ERROR_NOMATCH) continue;
917      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
918      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
919      fprintf(stderr, "this text:\n");
920      FWRITE(matchptr, 1, length, stderr);   /* In case binary zero included */
921      fprintf(stderr, "\n");
922      if (error_count == 0 &&
923          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
924        {
925        fprintf(stderr, "pcregrep: error %d means that a resource limit "
926          "was exceeded\n", *mrc);
927        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
928        }
929      if (error_count++ > 20)
930        {
931        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
932        exit(2);
933        }
934      return invert;    /* No more matching; don't show the line again */
935      }
936    
937    return FALSE;  /* No match, no errors */
938    }
939    
940    
941    
942    /*************************************************
943  *            Grep an individual file             *  *            Grep an individual file             *
944  *************************************************/  *************************************************/
945    
# Line 853  int linenumber = 1; Line 972  int linenumber = 1;
972  int lastmatchnumber = 0;  int lastmatchnumber = 0;
973  int count = 0;  int count = 0;
974  int filepos = 0;  int filepos = 0;
975  int offsets[99];  int offsets[OFFSET_SIZE];
976  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
977  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
978  char *ptr = buffer;  char *ptr = buffer;
979  char *endptr;  char *endptr;
980  size_t bufflength;  size_t bufflength;
981  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
982    BOOL input_line_buffered = line_buffered;
983  FILE *in = NULL;                    /* Ensure initialized */  FILE *in = NULL;                    /* Ensure initialized */
984    
985  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 897  else Line 1017  else
1017    
1018    {    {
1019    in = (FILE *)handle;    in = (FILE *)handle;
1020    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);    if (is_file_tty(in)) input_line_buffered = TRUE;
1021      bufflength = input_line_buffered?
1022        read_one_line(buffer, 3*MBUFTHIRD, in) :
1023        fread(buffer, 1, 3*MBUFTHIRD, in);
1024    }    }
1025    
1026  endptr = buffer + bufflength;  endptr = buffer + bufflength;
# Line 909  way, the buffer is shifted left and re-f Line 1032  way, the buffer is shifted left and re-f
1032    
1033  while (ptr < endptr)  while (ptr < endptr)
1034    {    {
1035    int i, endlinelength;    int endlinelength;
1036    int mrc = 0;    int mrc = 0;
1037    BOOL match = FALSE;    BOOL match;
1038    char *matchptr = ptr;    char *matchptr = ptr;
1039    char *t = ptr;    char *t = ptr;
1040    size_t length, linelength;    size_t length, linelength;
# Line 919  while (ptr < endptr) Line 1042  while (ptr < endptr)
1042    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1043    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1044    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1045    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1046    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1047    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1048      first line. */
1049    
1050    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1051    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
# Line 936  while (ptr < endptr) Line 1060  while (ptr < endptr)
1060        #include <time.h>        #include <time.h>
1061        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1062        struct timezone dummy;        struct timezone dummy;
1063          int i;
1064    
1065        if (jfriedl_XT)        if (jfriedl_XT)
1066        {        {
# Line 961  while (ptr < endptr) Line 1086  while (ptr < endptr)
1086    
1087    
1088        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1089            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1090                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1091    
1092        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1093                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 980  while (ptr < endptr) Line 1106  while (ptr < endptr)
1106    
1107    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1108    
1109    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches or there is an error other
1110    the final newline in the subject string. */    than NOMATCH. This code is in a subroutine so that it can be re-used for
1111      finding subsequent matches when colouring matched lines. */
1112    
1113    for (i = 0; i < pattern_count; i++)    match = match_patterns(matchptr, length, offsets, &mrc);
     {  
     mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1114    
1115    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1116    
# Line 1029  while (ptr < endptr) Line 1129  while (ptr < endptr)
1129      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1130      in the file. */      in the file. */
1131    
1132      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1133        {        {
1134        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1135        return 0;        return 0;
# Line 1054  while (ptr < endptr) Line 1154  while (ptr < endptr)
1154          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1155          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1156          if (line_offsets)          if (line_offsets)
1157            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1158              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1159          else if (file_offsets)          else if (file_offsets)
1160            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1161              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1162          else          else
1163            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            {
1164              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1165              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1166              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1167              }
1168          fprintf(stdout, "\n");          fprintf(stdout, "\n");
1169          matchptr += offsets[1];          matchptr += offsets[1];
1170          length -= offsets[1];          length -= offsets[1];
# Line 1100  while (ptr < endptr) Line 1204  while (ptr < endptr)
1204            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1205            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1206            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1207            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1208            lastmatchrestart = pp;            lastmatchrestart = pp;
1209            }            }
1210          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1140  while (ptr < endptr) Line 1244  while (ptr < endptr)
1244            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1245            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1246            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1247            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1248            p = pp;            p = pp;
1249            }            }
1250          }          }
# Line 1190  while (ptr < endptr) Line 1294  while (ptr < endptr)
1294          {          {
1295          int first = S_arg * 2;          int first = S_arg * 2;
1296          int last  = first + 1;          int last  = first + 1;
1297          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1298          fprintf(stdout, "X");          fprintf(stdout, "X");
1299          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1300          }          }
1301        else        else
1302  #endif  #endif
1303    
1304        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1305          matches. */
1306    
1307        if (do_colour)        if (do_colour)
1308          {          {
1309          fwrite(ptr, 1, offsets[0], stdout);          int last_offset = 0;
1310            FWRITE(ptr, 1, offsets[0], stdout);
1311          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1312          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1313          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1314          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1315            stdout);            {
1316              last_offset += offsets[1];
1317              matchptr += offsets[1];
1318              length -= offsets[1];
1319              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1320              FWRITE(matchptr, 1, offsets[0], stdout);
1321              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1322              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1323              fprintf(stdout, "%c[00m", 0x1b);
1324              }
1325            FWRITE(ptr + last_offset, 1,
1326              (linelength + endlinelength) - last_offset, stdout);
1327          }          }
1328        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1329          /* Not colouring; no need to search for further matches */
1330    
1331          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1332        }        }
1333    
1334      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1335        given, flush the output. */
1336    
1337        if (line_buffered) fflush(stdout);
1338      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1339    
1340      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1244  while (ptr < endptr) Line 1366  while (ptr < endptr)
1366    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1367    
1368    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1369    filepos += linelength + endlinelength;    filepos += (int)(linelength + endlinelength);
1370    linenumber++;    linenumber++;
1371    
1372      /* If input is line buffered, and the buffer is not yet full, read another
1373      line and add it into the buffer. */
1374    
1375      if (input_line_buffered && bufflength < sizeof(buffer))
1376        {
1377        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1378        bufflength += add;
1379        endptr += add;
1380        }
1381    
1382    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1383    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1384    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
# Line 1281  while (ptr < endptr) Line 1413  while (ptr < endptr)
1413      else      else
1414  #endif  #endif
1415    
1416      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*MBUFTHIRD +
1417          (input_line_buffered?
1418           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1419           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1420      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1421    
1422      /* Adjust any last match point */      /* Adjust any last match point */
# Line 1313  if (filenames == FN_NOMATCH_ONLY) Line 1447  if (filenames == FN_NOMATCH_ONLY)
1447    
1448  if (count_only)  if (count_only)
1449    {    {
1450    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1451    fprintf(stdout, "%d\n", count);      {
1452        if (printname != NULL && filenames != FN_NONE)
1453          fprintf(stdout, "%s:", printname);
1454        fprintf(stdout, "%d\n", count);
1455        }
1456    }    }
1457    
1458  return rc;  return rc;
# Line 1394  if ((sep = isdirectory(pathname)) != 0) Line 1532  if ((sep = isdirectory(pathname)) != 0)
1532        {        {
1533        int frc, nflen;        int frc, nflen;
1534        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1535        nflen = strlen(nextfile);        nflen = (int)(strlen(nextfile));
1536    
1537        if (isdirectory(buffer))        if (isdirectory(buffer))
1538          {          {
# Line 1438  skipping was not requested. The scan pro Line 1576  skipping was not requested. The scan pro
1576  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1577  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1578    
1579  pathlen = strlen(pathname);  pathlen = (int)(strlen(pathname));
1580    
1581  /* Open using zlib if it is supported and the file name ends with .gz. */  /* Open using zlib if it is supported and the file name ends with .gz. */
1582    
# Line 1478  an attempt to read a .bz2 file indicates Line 1616  an attempt to read a .bz2 file indicates
1616  PLAIN_FILE:  PLAIN_FILE:
1617  #endif  #endif
1618    {    {
1619    in = fopen(pathname, "r");    in = fopen(pathname, "rb");
1620    handle = (void *)in;    handle = (void *)in;
1621    frtype = FR_PLAIN;    frtype = FR_PLAIN;
1622    }    }
# Line 1629  switch(letter) Line 1767  switch(letter)
1767    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
1768    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1769    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
1770      case N_LBUFFER: line_buffered = TRUE; break;
1771    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1772    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1773    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1774    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1775    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1776    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1777    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1778    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1779    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
# Line 1819  const char *error; Line 1958  const char *error;
1958    
1959  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
1960  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1961  */  Note that the return values from pcre_config(), though derived from the ASCII
1962    codes, are the same in EBCDIC environments, so we must use the actual values
1963    rather than escapes such as as '\r'. */
1964    
1965  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1966  switch(i)  switch(i)
1967    {    {
1968    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
1969    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
1970    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1971    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
1972    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
1973    }    }
1974    
1975  /* Process the options */  /* Process the options */
# Line 1870  for (i = 1; i < argc; i++) Line 2011  for (i = 1; i < argc; i++)
2011      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2012      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2013      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2014      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2015      these categories, fortunately. */      both these categories. */
2016    
2017      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2018        {        {
2019        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2020        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2021        if (opbra == NULL)     /* Not a (p) case */  
2022          /* Handle options with only one spelling of the name */
2023    
2024          if (opbra == NULL)     /* Does not contain '(' */
2025          {          {
2026          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2027            {            {
# Line 1885  for (i = 1; i < argc; i++) Line 2029  for (i = 1; i < argc; i++)
2029            }            }
2030          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2031            {            {
2032            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2033            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2034                (int)strlen(arg) : (int)(argequals - arg);
2035            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2036              {              {
2037              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1899  for (i = 1; i < argc; i++) Line 2044  for (i = 1; i < argc; i++)
2044              }              }
2045            }            }
2046          }          }
2047        else                   /* Special case xxxx(p) */  
2048          /* Handle options with an alternate spelling of the name */
2049    
2050          else
2051          {          {
2052          char buff1[24];          char buff1[24];
2053          char buff2[24];          char buff2[24];
2054          int baselen = opbra - op->long_name;  
2055            int baselen = (int)(opbra - op->long_name);
2056            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2057            int arglen = (argequals == NULL || equals == NULL)?
2058              (int)strlen(arg) : (int)(argequals - arg);
2059    
2060          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2061          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2062            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2063          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2064               strncmp(arg, buff2, arglen) == 0)
2065              {
2066              if (equals != NULL && argequals != NULL)
2067                {
2068                option_data = argequals;
2069                if (*option_data == '=')
2070                  {
2071                  option_data++;
2072                  longopwasequals = TRUE;
2073                  }
2074                }
2075            break;            break;
2076              }
2077          }          }
2078        }        }
2079    
# Line 1919  for (i = 1; i < argc; i++) Line 2084  for (i = 1; i < argc; i++)
2084        }        }
2085      }      }
2086    
   
2087    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2088    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2089    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 2033  for (i = 1; i < argc; i++) Line 2197  for (i = 1; i < argc; i++)
2197      {      {
2198      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2199      }      }
2200    
2201      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2202      only for unpicking arguments, so just keep it simple. */
2203    
2204    else    else
2205      {      {
2206      char *endptr;      int n = 0;
2207      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2208        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2209        while (isdigit((unsigned char)(*endptr)))
2210          n = n * 10 + (int)(*endptr++ - '0');
2211      if (*endptr != 0)      if (*endptr != 0)
2212        {        {
2213        if (longop)        if (longop)
2214          {          {
2215          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2216          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2217            equals - op->long_name;            (int)(equals - op->long_name);
2218          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2219            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2220          }          }

Legend:
Removed from v.345  
changed lines
  Added in v.558

  ViewVC Help
Powered by ViewVC 1.1.5