/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 391 by ph10, Tue Mar 17 21:16:01 2009 UTC revision 564 by ph10, Sun Oct 31 16:07:24 2010 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 83  typedef int BOOL; Line 83  typedef int BOOL;
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88  /* File reading styles */  /* File reading styles */
89    
# Line 104  enum { DEE_READ, DEE_SKIP }; Line 104  enum { DEE_READ, DEE_SKIP };
104    
105  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 157  static int error_count = 0; Line 165  static int error_count = 0;
165  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
166  static int process_options = 0;  static int process_options = 0;
167    
168    static unsigned long int match_limit = 0;
169    static unsigned long int match_limit_recursion = 0;
170    
171  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
172  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
173  static BOOL file_offsets = FALSE;  static BOOL file_offsets = FALSE;
174  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
175  static BOOL invert = FALSE;  static BOOL invert = FALSE;
176    static BOOL line_buffered = FALSE;
177  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
178  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
179  static BOOL number = FALSE;  static BOOL number = FALSE;
180    static BOOL omit_zero_count = FALSE;
181  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
182    static BOOL resource_error = FALSE;
183  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
184  static BOOL silent = FALSE;  static BOOL silent = FALSE;
185  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
# Line 197  used to identify them. */ Line 211  used to identify them. */
211  #define N_NULL         (-9)  #define N_NULL         (-9)
212  #define N_LOFFSETS     (-10)  #define N_LOFFSETS     (-10)
213  #define N_FOFFSETS     (-11)  #define N_FOFFSETS     (-11)
214    #define N_LBUFFER      (-12)
215    #define N_M_LIMIT      (-13)
216    #define N_M_LIMIT_REC  (-14)
217    
218  static option_item optionlist[] = {  static option_item optionlist[] = {
219    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 204  static option_item optionlist[] = { Line 221  static option_item optionlist[] = {
221    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
222    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
223    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
224      { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
225    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
226    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
   { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },  
227    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
228    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
229    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
230    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
231    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
232    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
233    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
# Line 219  static option_item optionlist[] = { Line 236  static option_item optionlist[] = {
236    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
237    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
238    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
239      { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
240    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
241    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
242      { OP_NUMBER,    N_M_LIMIT,&match_limit,      "match-limit=number", "set PCRE match limit option" },
243      { OP_NUMBER,    N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
245    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
# Line 330  return (statbuf.st_mode & S_IFMT) == S_I Line 350  return (statbuf.st_mode & S_IFMT) == S_I
350  }  }
351    
352    
353  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
354    
355  static BOOL  static BOOL
356  is_stdout_tty(void)  is_stdout_tty(void)
# Line 338  is_stdout_tty(void) Line 358  is_stdout_tty(void)
358  return isatty(fileno(stdout));  return isatty(fileno(stdout));
359  }  }
360    
361    static BOOL
362    is_file_tty(FILE *f)
363    {
364    return isatty(fileno(f));
365    }
366    
367    
368  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
369    
# Line 345  return isatty(fileno(stdout)); Line 371  return isatty(fileno(stdout));
371  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
372  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
373  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
374  */  The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
375    undefined when it is indeed undefined. */
376    
377  #elif HAVE_WINDOWS_H  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
378    
379  #ifndef STRICT  #ifndef STRICT
380  # define STRICT  # define STRICT
# Line 391  dir = (directory_type *) malloc(sizeof(* Line 418  dir = (directory_type *) malloc(sizeof(*
418  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
419    {    {
420    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
421    exit(2);    pcregrep_exit(2);
422    }    }
423  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
424  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 450  return !isdirectory(filename); Line 477  return !isdirectory(filename);
477  }  }
478    
479    
480  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
481    
482  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
483    
# Line 460  is_stdout_tty(void) Line 487  is_stdout_tty(void)
487  return FALSE;  return FALSE;
488  }  }
489    
490    static BOOL
491    is_file_tty(FILE *f)
492    {
493    return FALSE;
494    }
495    
496    
497  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
498    
# Line 482  void closedirectory(directory_type *dir) Line 515  void closedirectory(directory_type *dir)
515  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
516    
517    
518  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
519    
520  static BOOL  static BOOL
521  is_stdout_tty(void)  is_stdout_tty(void)
# Line 490  is_stdout_tty(void) Line 523  is_stdout_tty(void)
523  return FALSE;  return FALSE;
524  }  }
525    
526    static BOOL
527    is_file_tty(FILE *f)
528    {
529    return FALSE;
530    }
531    
532  #endif  #endif
533    
# Line 518  return sys_errlist[n]; Line 556  return sys_errlist[n];
556    
557    
558  /*************************************************  /*************************************************
559    *         Exit from the program                  *
560    *************************************************/
561    
562    /* If there has been a resource error, give a suitable message.
563    
564    Argument:  the return code
565    Returns:   does not return
566    */
567    
568    static void
569    pcregrep_exit(int rc)
570    {
571    if (resource_error)
572      {
573      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
574        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
575      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
576      }
577    
578    exit(rc);
579    }
580    
581    
582    
583    /*************************************************
584    *            Read one line of input              *
585    *************************************************/
586    
587    /* Normally, input is read using fread() into a large buffer, so many lines may
588    be read at once. However, doing this for tty input means that no output appears
589    until a lot of input has been typed. Instead, tty input is handled line by
590    line. We cannot use fgets() for this, because it does not stop at a binary
591    zero, and therefore there is no way of telling how many characters it has read,
592    because there may be binary zeros embedded in the data.
593    
594    Arguments:
595      buffer     the buffer to read into
596      length     the maximum number of characters to read
597      f          the file
598    
599    Returns:     the number of characters read, zero at end of file
600    */
601    
602    static int
603    read_one_line(char *buffer, int length, FILE *f)
604    {
605    int c;
606    int yield = 0;
607    while ((c = fgetc(f)) != EOF)
608      {
609      buffer[yield++] = c;
610      if (c == '\n' || yield >= length) break;
611      }
612    return yield;
613    }
614    
615    
616    
617    /*************************************************
618  *             Find end of line                   *  *             Find end of line                   *
619  *************************************************/  *************************************************/
620    
# Line 812  if (after_context > 0 && lastmatchnumber Line 909  if (after_context > 0 && lastmatchnumber
909      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
910      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
911      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
912      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
913      lastmatchrestart = pp;      lastmatchrestart = pp;
914      }      }
915    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 825  if (after_context > 0 && lastmatchnumber Line 922  if (after_context > 0 && lastmatchnumber
922  *   Apply patterns to subject till one matches   *  *   Apply patterns to subject till one matches   *
923  *************************************************/  *************************************************/
924    
925  /* This function is called to run through all patterns, looking for a match. It  /* This function is called to run through all patterns, looking for a match. It
926  is used multiple times for the same subject when colouring is enabled, in order  is used multiple times for the same subject when colouring is enabled, in order
927  to find all possible matches.  to find all possible matches.
928    
929  Arguments:  Arguments:
# Line 834  Arguments: Line 931  Arguments:
931    length      the length of the subject to match    length      the length of the subject to match
932    offsets     the offets vector to fill in    offsets     the offets vector to fill in
933    mrc         address of where to put the result of pcre_exec()    mrc         address of where to put the result of pcre_exec()
934    
935  Returns:      TRUE if there was a match  Returns:      TRUE if there was a match
936                FALSE if there was no match                FALSE if there was no match
937                invert if there was a non-fatal error                invert if there was a non-fatal error
938  */  */
939    
940  static BOOL  static BOOL
941  match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)  match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
942  {  {
943  int i;  int i;
944    size_t slen = length;
945    const char *msg = "this text:\n\n";
946    if (slen > 200)
947      {
948      slen = 200;
949      msg = "text that starts:\n\n";
950      }
951  for (i = 0; i < pattern_count; i++)  for (i = 0; i < pattern_count; i++)
952    {    {
953    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
954      PCRE_NOTEMPTY, offsets, OFFSET_SIZE);      PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
955    if (*mrc >= 0) return TRUE;    if (*mrc >= 0) return TRUE;
956    if (*mrc == PCRE_ERROR_NOMATCH) continue;    if (*mrc == PCRE_ERROR_NOMATCH) continue;
957    fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);    fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
958    if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);    if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
959    fprintf(stderr, "this text:\n");    fprintf(stderr, "%s", msg);
960    fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */    FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
961    fprintf(stderr, "\n");    fprintf(stderr, "\n\n");
962    if (error_count == 0 &&    if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
963        (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))      resource_error = TRUE;
     {  
     fprintf(stderr, "pcregrep: error %d means that a resource limit "  
       "was exceeded\n", *mrc);  
     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
     }  
964    if (error_count++ > 20)    if (error_count++ > 20)
965      {      {
966      fprintf(stderr, "pcregrep: too many errors - abandoned\n");      fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
967      exit(2);      pcregrep_exit(2);
968      }      }
969    return invert;    /* No more matching; don't show the line again */    return invert;    /* No more matching; don't show the line again */
970    }    }
# Line 915  char *ptr = buffer; Line 1014  char *ptr = buffer;
1014  char *endptr;  char *endptr;
1015  size_t bufflength;  size_t bufflength;
1016  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1017    BOOL input_line_buffered = line_buffered;
1018  FILE *in = NULL;                    /* Ensure initialized */  FILE *in = NULL;                    /* Ensure initialized */
1019    
1020  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 952  else Line 1052  else
1052    
1053    {    {
1054    in = (FILE *)handle;    in = (FILE *)handle;
1055    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);    if (is_file_tty(in)) input_line_buffered = TRUE;
1056      bufflength = input_line_buffered?
1057        read_one_line(buffer, 3*MBUFTHIRD, in) :
1058        fread(buffer, 1, 3*MBUFTHIRD, in);
1059    }    }
1060    
1061  endptr = buffer + bufflength;  endptr = buffer + bufflength;
# Line 992  while (ptr < endptr) Line 1095  while (ptr < endptr)
1095        #include <time.h>        #include <time.h>
1096        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1097        struct timezone dummy;        struct timezone dummy;
1098        int i;        int i;
1099    
1100        if (jfriedl_XT)        if (jfriedl_XT)
1101        {        {
# Line 1001  while (ptr < endptr) Line 1104  while (ptr < endptr)
1104            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1105            if (!ptr) {            if (!ptr) {
1106                    printf("out of memory");                    printf("out of memory");
1107                    exit(2);                    pcregrep_exit(2);
1108            }            }
1109            endptr = ptr;            endptr = ptr;
1110            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 1018  while (ptr < endptr) Line 1121  while (ptr < endptr)
1121    
1122    
1123        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1124            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1125                PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);                PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1126    
1127        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
# Line 1038  while (ptr < endptr) Line 1141  while (ptr < endptr)
1141    
1142    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1143    
1144    /* Run through all the patterns until one matches or there is an error other    /* Run through all the patterns until one matches or there is an error other
1145    than NOMATCH. This code is in a subroutine so that it can be re-used for    than NOMATCH. This code is in a subroutine so that it can be re-used for
1146    finding subsequent matches when colouring matched lines. */    finding subsequent matches when colouring matched lines. */
1147    
1148    match = match_patterns(matchptr, length, offsets, &mrc);    match = match_patterns(matchptr, length, offsets, &mrc);
1149    
1150    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
# Line 1061  while (ptr < endptr) Line 1164  while (ptr < endptr)
1164      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1165      in the file. */      in the file. */
1166    
1167      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1168        {        {
1169        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1170        return 0;        return 0;
# Line 1094  while (ptr < endptr) Line 1197  while (ptr < endptr)
1197          else          else
1198            {            {
1199            if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);            if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1200            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1201            if (do_colour) fprintf(stdout, "%c[00m", 0x1b);            if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1202            }            }
1203          fprintf(stdout, "\n");          fprintf(stdout, "\n");
1204          matchptr += offsets[1];          matchptr += offsets[1];
1205          length -= offsets[1];          length -= offsets[1];
1206          match = FALSE;          match = FALSE;
1207            if (line_buffered) fflush(stdout);
1208            rc = 0;    /* Had some success */
1209          goto ONLY_MATCHING_RESTART;          goto ONLY_MATCHING_RESTART;
1210          }          }
1211        }        }
# Line 1136  while (ptr < endptr) Line 1241  while (ptr < endptr)
1241            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1242            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1243            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1244            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1245            lastmatchrestart = pp;            lastmatchrestart = pp;
1246            }            }
1247          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1176  while (ptr < endptr) Line 1281  while (ptr < endptr)
1281            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1282            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1283            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1284            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1285            p = pp;            p = pp;
1286            }            }
1287          }          }
# Line 1226  while (ptr < endptr) Line 1331  while (ptr < endptr)
1331          {          {
1332          int first = S_arg * 2;          int first = S_arg * 2;
1333          int last  = first + 1;          int last  = first + 1;
1334          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1335          fprintf(stdout, "X");          fprintf(stdout, "X");
1336          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1337          }          }
1338        else        else
1339  #endif  #endif
1340    
1341        /* We have to split the line(s) up if colouring, and search for further        /* We have to split the line(s) up if colouring, and search for further
1342        matches. */        matches. */
1343    
1344        if (do_colour)        if (do_colour)
1345          {          {
1346          int last_offset = 0;          int last_offset = 0;
1347          fwrite(ptr, 1, offsets[0], stdout);          FWRITE(ptr, 1, offsets[0], stdout);
1348          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1349          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1350          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1351          for (;;)          for (;;)
1352            {            {
1353            last_offset += offsets[1];            last_offset += offsets[1];
1354            matchptr += offsets[1];            matchptr += offsets[1];
1355            length -= offsets[1];            length -= offsets[1];
1356            if (!match_patterns(matchptr, length, offsets, &mrc)) break;            if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1357            fwrite(matchptr, 1, offsets[0], stdout);            FWRITE(matchptr, 1, offsets[0], stdout);
1358            fprintf(stdout, "%c[%sm", 0x1b, colour_string);            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1359            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1360            fprintf(stdout, "%c[00m", 0x1b);            fprintf(stdout, "%c[00m", 0x1b);
1361            }            }
1362          fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,          FWRITE(ptr + last_offset, 1,
1363            stdout);            (linelength + endlinelength) - last_offset, stdout);
1364          }          }
1365    
1366        /* Not colouring; no need to search for further matches */        /* Not colouring; no need to search for further matches */
1367    
1368        else fwrite(ptr, 1, linelength + endlinelength, stdout);        else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1369        }        }
1370    
1371      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1372        given, flush the output. */
1373    
1374        if (line_buffered) fflush(stdout);
1375      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1376    
1377      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1296  while (ptr < endptr) Line 1403  while (ptr < endptr)
1403    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1404    
1405    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1406    filepos += linelength + endlinelength;    filepos += (int)(linelength + endlinelength);
1407    linenumber++;    linenumber++;
1408    
1409      /* If input is line buffered, and the buffer is not yet full, read another
1410      line and add it into the buffer. */
1411    
1412      if (input_line_buffered && bufflength < sizeof(buffer))
1413        {
1414        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1415        bufflength += add;
1416        endptr += add;
1417        }
1418    
1419    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1420    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1421    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
# Line 1333  while (ptr < endptr) Line 1450  while (ptr < endptr)
1450      else      else
1451  #endif  #endif
1452    
1453      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*MBUFTHIRD +
1454          (input_line_buffered?
1455           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1456           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1457      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1458    
1459      /* Adjust any last match point */      /* Adjust any last match point */
# Line 1365  if (filenames == FN_NOMATCH_ONLY) Line 1484  if (filenames == FN_NOMATCH_ONLY)
1484    
1485  if (count_only)  if (count_only)
1486    {    {
1487    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1488    fprintf(stdout, "%d\n", count);      {
1489        if (printname != NULL && filenames != FN_NONE)
1490          fprintf(stdout, "%s:", printname);
1491        fprintf(stdout, "%d\n", count);
1492        }
1493    }    }
1494    
1495  return rc;  return rc;
# Line 1446  if ((sep = isdirectory(pathname)) != 0) Line 1569  if ((sep = isdirectory(pathname)) != 0)
1569        {        {
1570        int frc, nflen;        int frc, nflen;
1571        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1572        nflen = strlen(nextfile);        nflen = (int)(strlen(nextfile));
1573    
1574        if (isdirectory(buffer))        if (isdirectory(buffer))
1575          {          {
# Line 1490  skipping was not requested. The scan pro Line 1613  skipping was not requested. The scan pro
1613  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1614  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1615    
1616  pathlen = strlen(pathname);  pathlen = (int)(strlen(pathname));
1617    
1618  /* Open using zlib if it is supported and the file name ends with .gz. */  /* Open using zlib if it is supported and the file name ends with .gz. */
1619    
# Line 1530  an attempt to read a .bz2 file indicates Line 1653  an attempt to read a .bz2 file indicates
1653  PLAIN_FILE:  PLAIN_FILE:
1654  #endif  #endif
1655    {    {
1656    in = fopen(pathname, "r");    in = fopen(pathname, "rb");
1657    handle = (void *)in;    handle = (void *)in;
1658    frtype = FR_PLAIN;    frtype = FR_PLAIN;
1659    }    }
# Line 1679  handle_option(int letter, int options) Line 1802  handle_option(int letter, int options)
1802  switch(letter)  switch(letter)
1803    {    {
1804    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
1805    case N_HELP: help(); exit(0);    case N_HELP: help(); pcregrep_exit(0);
1806    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
1807      case N_LBUFFER: line_buffered = TRUE; break;
1808    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1809    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1810    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1811    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1812    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1813    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1814    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1815    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1816    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
# Line 1701  switch(letter) Line 1825  switch(letter)
1825    
1826    case 'V':    case 'V':
1827    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1828    exit(0);    pcregrep_exit(0);
1829    break;    break;
1830    
1831    default:    default:
1832    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1833    exit(usage(2));    pcregrep_exit(usage(2));
1834    }    }
1835    
1836  return options;  return options;
# Line 1872  const char *error; Line 1996  const char *error;
1996  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
1997  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1998  Note that the return values from pcre_config(), though derived from the ASCII  Note that the return values from pcre_config(), though derived from the ASCII
1999  codes, are the same in EBCDIC environments, so we must use the actual values  codes, are the same in EBCDIC environments, so we must use the actual values
2000  rather than escapes such as as '\r'. */  rather than escapes such as as '\r'. */
2001    
2002  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
# Line 1902  for (i = 1; i < argc; i++) Line 2026  for (i = 1; i < argc; i++)
2026    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2027      {      {
2028      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2029        else exit(usage(2));        else pcregrep_exit(usage(2));
2030      }      }
2031    
2032    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1924  for (i = 1; i < argc; i++) Line 2048  for (i = 1; i < argc; i++)
2048      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2049      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2050      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2051      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2052      these categories, fortunately. */      both these categories. */
2053    
2054      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2055        {        {
2056        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2057        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2058        if (opbra == NULL)     /* Not a (p) case */  
2059          /* Handle options with only one spelling of the name */
2060    
2061          if (opbra == NULL)     /* Does not contain '(' */
2062          {          {
2063          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2064            {            {
# Line 1939  for (i = 1; i < argc; i++) Line 2066  for (i = 1; i < argc; i++)
2066            }            }
2067          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2068            {            {
2069            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2070            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2071                (int)strlen(arg) : (int)(argequals - arg);
2072            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2073              {              {
2074              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1953  for (i = 1; i < argc; i++) Line 2081  for (i = 1; i < argc; i++)
2081              }              }
2082            }            }
2083          }          }
2084        else                   /* Special case xxxx(p) */  
2085          /* Handle options with an alternate spelling of the name */
2086    
2087          else
2088          {          {
2089          char buff1[24];          char buff1[24];
2090          char buff2[24];          char buff2[24];
2091          int baselen = opbra - op->long_name;  
2092            int baselen = (int)(opbra - op->long_name);
2093            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2094            int arglen = (argequals == NULL || equals == NULL)?
2095              (int)strlen(arg) : (int)(argequals - arg);
2096    
2097          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2098          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2099            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2100          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2101               strncmp(arg, buff2, arglen) == 0)
2102              {
2103              if (equals != NULL && argequals != NULL)
2104                {
2105                option_data = argequals;
2106                if (*option_data == '=')
2107                  {
2108                  option_data++;
2109                  longopwasequals = TRUE;
2110                  }
2111                }
2112            break;            break;
2113              }
2114          }          }
2115        }        }
2116    
2117      if (op->one_char == 0)      if (op->one_char == 0)
2118        {        {
2119        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2120        exit(usage(2));        pcregrep_exit(usage(2));
2121        }        }
2122      }      }
2123    
   
2124    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2125    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2126    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 2012  for (i = 1; i < argc; i++) Line 2159  for (i = 1; i < argc; i++)
2159          {          {
2160          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2161            *s, argv[i]);            *s, argv[i]);
2162          exit(usage(2));          pcregrep_exit(usage(2));
2163          }          }
2164        if (op->type != OP_NODATA || s[1] == 0)        if (op->type != OP_NODATA || s[1] == 0)
2165          {          {
# Line 2062  for (i = 1; i < argc; i++) Line 2209  for (i = 1; i < argc; i++)
2209      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2210        {        {
2211        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2212        exit(usage(2));        pcregrep_exit(usage(2));
2213        }        }
2214      option_data = argv[++i];      option_data = argv[++i];
2215      }      }
# Line 2087  for (i = 1; i < argc; i++) Line 2234  for (i = 1; i < argc; i++)
2234      {      {
2235      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2236      }      }
2237    
2238      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2239      only for unpicking arguments, so just keep it simple. */
2240    
2241    else    else
2242      {      {
2243      char *endptr;      unsigned long int n = 0;
2244      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2245        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2246        while (isdigit((unsigned char)(*endptr)))
2247          n = n * 10 + (int)(*endptr++ - '0');
2248      if (*endptr != 0)      if (*endptr != 0)
2249        {        {
2250        if (longop)        if (longop)
2251          {          {
2252          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2253          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2254            equals - op->long_name;            (int)(equals - op->long_name);
2255          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2256            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2257          }          }
2258        else        else
2259          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2260            option_data, op->one_char);            option_data, op->one_char);
2261        exit(usage(2));        pcregrep_exit(usage(2));
2262        }        }
2263      *((int *)op->dataptr) = n;      *((int *)op->dataptr) = n;
2264      }      }
# Line 2127  if ((only_matching && (file_offsets || l Line 2281  if ((only_matching && (file_offsets || l
2281    {    {
2282    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2283      "and/or --line-offsets\n");      "and/or --line-offsets\n");
2284    exit(usage(2));    pcregrep_exit(usage(2));
2285    }    }
2286    
2287  if (file_offsets || line_offsets) only_matching = TRUE;  if (file_offsets || line_offsets) only_matching = TRUE;
# Line 2338  for (j = 0; j < pattern_count; j++) Line 2492  for (j = 0; j < pattern_count; j++)
2492      }      }
2493    hint_count++;    hint_count++;
2494    }    }
2495    
2496    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2497    pcre_extra block for each pattern. */
2498    
2499    if (match_limit > 0 || match_limit_recursion > 0)
2500      {
2501      for (j = 0; j < pattern_count; j++)
2502        {
2503        if (hints_list[j] == NULL)
2504          {
2505          hints_list[j] = malloc(sizeof(pcre_extra));
2506          if (hints_list[j] == NULL)
2507            {
2508            fprintf(stderr, "pcregrep: malloc failed\n");
2509            pcregrep_exit(2);
2510            }
2511          }
2512        if (match_limit > 0)
2513          {
2514          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2515          hints_list[j]->match_limit = match_limit;
2516          }
2517        if (match_limit_recursion > 0)
2518          {
2519          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2520          hints_list[j]->match_limit_recursion = match_limit_recursion;
2521          }
2522        }
2523      }
2524    
2525  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2526    
# Line 2420  if (pattern_list != NULL) Line 2603  if (pattern_list != NULL)
2603    }    }
2604  if (hints_list != NULL)  if (hints_list != NULL)
2605    {    {
2606    for (i = 0; i < hint_count; i++) free(hints_list[i]);    for (i = 0; i < hint_count; i++)
2607        {
2608        if (hints_list[i] != NULL) free(hints_list[i]);
2609        }
2610    free(hints_list);    free(hints_list);
2611    }    }
2612  return rc;  pcregrep_exit(rc);
2613    
2614  EXIT2:  EXIT2:
2615  rc = 2;  rc = 2;

Legend:
Removed from v.391  
changed lines
  Added in v.564

  ViewVC Help
Powered by ViewVC 1.1.5