/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 296 by ph10, Tue Jan 1 20:09:30 2008 UTC revision 586 by ph10, Wed Jan 12 17:36:47 2011 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 71  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 82  typedef int BOOL; Line 83  typedef int BOOL;
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88  /* File reading styles */  /* File reading styles */
89    
# Line 103  enum { DEE_READ, DEE_SKIP }; Line 104  enum { DEE_READ, DEE_SKIP };
104    
105  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 139  static pcre_extra **hints_list = NULL; Line 148  static pcre_extra **hints_list = NULL;
148    
149  static char *include_pattern = NULL;  static char *include_pattern = NULL;
150  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
155  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159  static int after_context = 0;  static int after_context = 0;
160  static int before_context = 0;  static int before_context = 0;
# Line 150  static int dee_action = dee_READ; Line 163  static int dee_action = dee_READ;
163  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
164  static int error_count = 0;  static int error_count = 0;
165  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
166    static int only_matching = -1;
167  static int process_options = 0;  static int process_options = 0;
168    
169    static unsigned long int match_limit = 0;
170    static unsigned long int match_limit_recursion = 0;
171    
172  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
173  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
174  static BOOL file_offsets = FALSE;  static BOOL file_offsets = FALSE;
175  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
176  static BOOL invert = FALSE;  static BOOL invert = FALSE;
177    static BOOL line_buffered = FALSE;
178  static BOOL line_offsets = FALSE;  static BOOL line_offsets = FALSE;
179  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
180  static BOOL number = FALSE;  static BOOL number = FALSE;
181  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
182    static BOOL resource_error = FALSE;
183  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
184  static BOOL silent = FALSE;  static BOOL silent = FALSE;
185  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
186    
187  /* Structure for options and list of them */  /* Structure for options and list of them */
188    
189  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190         OP_PATLIST };         OP_OP_NUMBER, OP_PATLIST };
191    
192  typedef struct option_item {  typedef struct option_item {
193    int type;    int type;
# Line 181  typedef struct option_item { Line 200  typedef struct option_item {
200  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
201  used to identify them. */  used to identify them. */
202    
203  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
204  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
205  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
206  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
207  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
208  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
209  #define N_NULL      (-7)  #define N_LABEL        (-7)
210  #define N_LOFFSETS  (-8)  #define N_LOCALE       (-8)
211  #define N_FOFFSETS  (-9)  #define N_NULL         (-9)
212    #define N_LOFFSETS     (-10)
213    #define N_FOFFSETS     (-11)
214    #define N_LBUFFER      (-12)
215    #define N_M_LIMIT      (-13)
216    #define N_M_LIMIT_REC  (-14)
217    
218  static option_item optionlist[] = {  static option_item optionlist[] = {
219    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,     N_NULL,   NULL,              "",              "  terminate options" },
220    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },    { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
221    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
222    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
223    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
224    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
225    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
226    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
227    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
228    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
229    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,    'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
230    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
231    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,     'f',      &pattern_filename, "file=path",     "read patterns from file" },
232    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },    { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
233    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
234    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
235    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
236    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
237    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
238    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
239    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },    { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
240    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
241    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
242    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },    { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
243    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
245    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
247    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_OP_NUMBER,  'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
248    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
249      { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
250      { OP_STRING,     N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
251      { OP_STRING,     N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
252      { OP_STRING,     N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253      { OP_STRING,     N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254    
255      /* These two were accidentally implemented with underscores instead of
256      hyphens in the option names. As this was not discovered for several releases,
257      the incorrect versions are left in the table for compatibility. However, the
258      --help function misses out any option that has an underscore in its name. */
259    
260      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262    
263  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
264    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
265  #endif  #endif
# Line 258  const char utf8_table4[] = { Line 296  const char utf8_table4[] = {
296    
297    
298  /*************************************************  /*************************************************
299    *         Exit from the program                  *
300    *************************************************/
301    
302    /* If there has been a resource error, give a suitable message.
303    
304    Argument:  the return code
305    Returns:   does not return
306    */
307    
308    static void
309    pcregrep_exit(int rc)
310    {
311    if (resource_error)
312      {
313      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316      }
317    
318    exit(rc);
319    }
320    
321    
322    /*************************************************
323  *            OS-specific functions               *  *            OS-specific functions               *
324  *************************************************/  *************************************************/
325    
# Line 321  return (statbuf.st_mode & S_IFMT) == S_I Line 383  return (statbuf.st_mode & S_IFMT) == S_I
383  }  }
384    
385    
386  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
387    
388  static BOOL  static BOOL
389  is_stdout_tty(void)  is_stdout_tty(void)
# Line 329  is_stdout_tty(void) Line 391  is_stdout_tty(void)
391  return isatty(fileno(stdout));  return isatty(fileno(stdout));
392  }  }
393    
394    static BOOL
395    is_file_tty(FILE *f)
396    {
397    return isatty(fileno(f));
398    }
399    
400    
401  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
402    
# Line 336  return isatty(fileno(stdout)); Line 404  return isatty(fileno(stdout));
404  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
405  when it did not exist. David Byron added a patch that moved the #include of  when it did not exist. David Byron added a patch that moved the #include of
406  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.  <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
407  */  The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
408    undefined when it is indeed undefined. */
409    
410  #elif HAVE_WINDOWS_H  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
411    
412  #ifndef STRICT  #ifndef STRICT
413  # define STRICT  # define STRICT
# Line 382  dir = (directory_type *) malloc(sizeof(* Line 451  dir = (directory_type *) malloc(sizeof(*
451  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
452    {    {
453    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
454    exit(2);    pcregrep_exit(2);
455    }    }
456  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
457  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 441  return !isdirectory(filename); Line 510  return !isdirectory(filename);
510  }  }
511    
512    
513  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
514    
515  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
516    
# Line 451  is_stdout_tty(void) Line 520  is_stdout_tty(void)
520  return FALSE;  return FALSE;
521  }  }
522    
523    static BOOL
524    is_file_tty(FILE *f)
525    {
526    return FALSE;
527    }
528    
529    
530  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
531    
# Line 473  void closedirectory(directory_type *dir) Line 548  void closedirectory(directory_type *dir)
548  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
549    
550    
551  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
552    
553  static BOOL  static BOOL
554  is_stdout_tty(void)  is_stdout_tty(void)
# Line 481  is_stdout_tty(void) Line 556  is_stdout_tty(void)
556  return FALSE;  return FALSE;
557  }  }
558    
559    static BOOL
560    is_file_tty(FILE *f)
561    {
562    return FALSE;
563    }
564    
565  #endif  #endif
566    
# Line 509  return sys_errlist[n]; Line 589  return sys_errlist[n];
589    
590    
591  /*************************************************  /*************************************************
592    *            Read one line of input              *
593    *************************************************/
594    
595    /* Normally, input is read using fread() into a large buffer, so many lines may
596    be read at once. However, doing this for tty input means that no output appears
597    until a lot of input has been typed. Instead, tty input is handled line by
598    line. We cannot use fgets() for this, because it does not stop at a binary
599    zero, and therefore there is no way of telling how many characters it has read,
600    because there may be binary zeros embedded in the data.
601    
602    Arguments:
603      buffer     the buffer to read into
604      length     the maximum number of characters to read
605      f          the file
606    
607    Returns:     the number of characters read, zero at end of file
608    */
609    
610    static int
611    read_one_line(char *buffer, int length, FILE *f)
612    {
613    int c;
614    int yield = 0;
615    while ((c = fgetc(f)) != EOF)
616      {
617      buffer[yield++] = c;
618      if (c == '\n' || yield >= length) break;
619      }
620    return yield;
621    }
622    
623    
624    
625    /*************************************************
626  *             Find end of line                   *  *             Find end of line                   *
627  *************************************************/  *************************************************/
628    
# Line 803  if (after_context > 0 && lastmatchnumber Line 917  if (after_context > 0 && lastmatchnumber
917      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
918      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
919      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
920      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
921      lastmatchrestart = pp;      lastmatchrestart = pp;
922      }      }
923    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 813  if (after_context > 0 && lastmatchnumber Line 927  if (after_context > 0 && lastmatchnumber
927    
928    
929  /*************************************************  /*************************************************
930    *   Apply patterns to subject till one matches   *
931    *************************************************/
932    
933    /* This function is called to run through all patterns, looking for a match. It
934    is used multiple times for the same subject when colouring is enabled, in order
935    to find all possible matches.
936    
937    Arguments:
938      matchptr    the start of the subject
939      length      the length of the subject to match
940      offsets     the offets vector to fill in
941      mrc         address of where to put the result of pcre_exec()
942    
943    Returns:      TRUE if there was a match
944                  FALSE if there was no match
945                  invert if there was a non-fatal error
946    */
947    
948    static BOOL
949    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
950    {
951    int i;
952    size_t slen = length;
953    const char *msg = "this text:\n\n";
954    if (slen > 200)
955      {
956      slen = 200;
957      msg = "text that starts:\n\n";
958      }
959    for (i = 0; i < pattern_count; i++)
960      {
961      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
962        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
963      if (*mrc >= 0) return TRUE;
964      if (*mrc == PCRE_ERROR_NOMATCH) continue;
965      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
966      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
967      fprintf(stderr, "%s", msg);
968      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
969      fprintf(stderr, "\n\n");
970      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
971        resource_error = TRUE;
972      if (error_count++ > 20)
973        {
974        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
975        pcregrep_exit(2);
976        }
977      return invert;    /* No more matching; don't show the line again */
978      }
979    
980    return FALSE;  /* No match, no errors */
981    }
982    
983    
984    
985    /*************************************************
986  *            Grep an individual file             *  *            Grep an individual file             *
987  *************************************************/  *************************************************/
988    
# Line 845  int linenumber = 1; Line 1015  int linenumber = 1;
1015  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1016  int count = 0;  int count = 0;
1017  int filepos = 0;  int filepos = 0;
1018  int offsets[99];  int offsets[OFFSET_SIZE];
1019  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1020  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
1021  char *ptr = buffer;  char *ptr = buffer;
1022  char *endptr;  char *endptr;
1023  size_t bufflength;  size_t bufflength;
1024  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1025    BOOL input_line_buffered = line_buffered;
1026  FILE *in = NULL;                    /* Ensure initialized */  FILE *in = NULL;                    /* Ensure initialized */
1027    
1028  #ifdef SUPPORT_LIBZ  #ifdef SUPPORT_LIBZ
# Line 889  else Line 1060  else
1060    
1061    {    {
1062    in = (FILE *)handle;    in = (FILE *)handle;
1063    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);    if (is_file_tty(in)) input_line_buffered = TRUE;
1064      bufflength = input_line_buffered?
1065        read_one_line(buffer, 3*MBUFTHIRD, in) :
1066        fread(buffer, 1, 3*MBUFTHIRD, in);
1067    }    }
1068    
1069  endptr = buffer + bufflength;  endptr = buffer + bufflength;
# Line 901  way, the buffer is shifted left and re-f Line 1075  way, the buffer is shifted left and re-f
1075    
1076  while (ptr < endptr)  while (ptr < endptr)
1077    {    {
1078    int i, endlinelength;    int endlinelength;
1079    int mrc = 0;    int mrc = 0;
1080    BOOL match = FALSE;    BOOL match;
1081    char *matchptr = ptr;    char *matchptr = ptr;
1082    char *t = ptr;    char *t = ptr;
1083    size_t length, linelength;    size_t length, linelength;
# Line 911  while (ptr < endptr) Line 1085  while (ptr < endptr)
1085    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1086    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1087    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1088    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1089    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1090    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1091      first line. */
1092    
1093    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1094    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
# Line 928  while (ptr < endptr) Line 1103  while (ptr < endptr)
1103        #include <time.h>        #include <time.h>
1104        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1105        struct timezone dummy;        struct timezone dummy;
1106          int i;
1107    
1108        if (jfriedl_XT)        if (jfriedl_XT)
1109        {        {
# Line 936  while (ptr < endptr) Line 1112  while (ptr < endptr)
1112            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1113            if (!ptr) {            if (!ptr) {
1114                    printf("out of memory");                    printf("out of memory");
1115                    exit(2);                    pcregrep_exit(2);
1116            }            }
1117            endptr = ptr;            endptr = ptr;
1118            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 953  while (ptr < endptr) Line 1129  while (ptr < endptr)
1129    
1130    
1131        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1132            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1133                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1134    
1135        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1136                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 972  while (ptr < endptr) Line 1149  while (ptr < endptr)
1149    
1150    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1151    
1152    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches or there is an error other
1153    the final newline in the subject string. */    than NOMATCH. This code is in a subroutine so that it can be re-used for
1154      finding subsequent matches when colouring matched lines. */
1155    
1156    for (i = 0; i < pattern_count; i++)    match = match_patterns(matchptr, length, offsets, &mrc);
     {  
     mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1157    
1158    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1159    
# Line 1021  while (ptr < endptr) Line 1172  while (ptr < endptr)
1172      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1173      in the file. */      in the file. */
1174    
1175      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1176        {        {
1177        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1178        return 0;        return 0;
# Line 1031  while (ptr < endptr) Line 1182  while (ptr < endptr)
1182    
1183      else if (quiet) return 0;      else if (quiet) return 0;
1184    
1185      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, or a
1186      the --file-offsets and --line-offsets options output offsets for the      captured portion of it, as long as this string is not empty, and the
1187      matching substring (they both force --only-matching). None of these options      --file-offsets and --line-offsets options output offsets for the matching
1188        substring (they both force --only-matching = 0). None of these options
1189      prints any context. Afterwards, adjust the start and length, and then jump      prints any context. Afterwards, adjust the start and length, and then jump
1190      back to look for further matches in the same line. If we are in invert      back to look for further matches in the same line. If we are in invert
1191      mode, however, nothing is printed - this could be still useful because the      mode, however, nothing is printed and we do not restart - this could still
1192      return code is set. */      be useful because the return code is set. */
1193    
1194      else if (only_matching)      else if (only_matching >= 0)
1195        {        {
1196        if (!invert)        if (!invert)
1197          {          {
1198          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1199          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1200          if (line_offsets)          if (line_offsets)
1201            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1202              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1203          else if (file_offsets)          else if (file_offsets)
1204            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d\n",
1205                (int)(filepos + matchptr + offsets[0] - ptr),
1206              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1207          else          else if (only_matching < mrc)
1208            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            {
1209          fprintf(stdout, "\n");            int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1210              if (plen > 0)
1211                {
1212                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1213                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1214                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1215                fprintf(stdout, "\n");
1216                }
1217              }
1218            else if (printname != NULL || number) fprintf(stdout, "\n");
1219          matchptr += offsets[1];          matchptr += offsets[1];
1220          length -= offsets[1];          length -= offsets[1];
1221          match = FALSE;          match = FALSE;
1222            if (line_buffered) fflush(stdout);
1223            rc = 0;    /* Had some success */
1224          goto ONLY_MATCHING_RESTART;          goto ONLY_MATCHING_RESTART;
1225          }          }
1226        }        }
# Line 1092  while (ptr < endptr) Line 1256  while (ptr < endptr)
1256            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1257            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1258            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1259            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1260            lastmatchrestart = pp;            lastmatchrestart = pp;
1261            }            }
1262          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 1132  while (ptr < endptr) Line 1296  while (ptr < endptr)
1296            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1297            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1298            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1299            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1300            p = pp;            p = pp;
1301            }            }
1302          }          }
# Line 1182  while (ptr < endptr) Line 1346  while (ptr < endptr)
1346          {          {
1347          int first = S_arg * 2;          int first = S_arg * 2;
1348          int last  = first + 1;          int last  = first + 1;
1349          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1350          fprintf(stdout, "X");          fprintf(stdout, "X");
1351          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1352          }          }
1353        else        else
1354  #endif  #endif
1355    
1356        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1357          matches, but not of course if the line is a non-match. */
1358    
1359        if (do_colour)        if (do_colour && !invert)
1360          {          {
1361          fwrite(ptr, 1, offsets[0], stdout);          int last_offset = 0;
1362            FWRITE(ptr, 1, offsets[0], stdout);
1363          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1364          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1365          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1366          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1367            stdout);            {
1368              last_offset += offsets[1];
1369              matchptr += offsets[1];
1370              length -= offsets[1];
1371              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1372              FWRITE(matchptr, 1, offsets[0], stdout);
1373              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1374              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1375              fprintf(stdout, "%c[00m", 0x1b);
1376              }
1377            FWRITE(ptr + last_offset, 1,
1378              (linelength + endlinelength) - last_offset, stdout);
1379          }          }
1380        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1381          /* Not colouring; no need to search for further matches */
1382    
1383          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1384        }        }
1385    
1386      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1387        given, flush the output. */
1388    
1389        if (line_buffered) fflush(stdout);
1390      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1391    
1392      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1236  while (ptr < endptr) Line 1418  while (ptr < endptr)
1418    offset to the current line is maintained in filepos. */    offset to the current line is maintained in filepos. */
1419    
1420    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1421    filepos += linelength + endlinelength;    filepos += (int)(linelength + endlinelength);
1422    linenumber++;    linenumber++;
1423    
1424      /* If input is line buffered, and the buffer is not yet full, read another
1425      line and add it into the buffer. */
1426    
1427      if (input_line_buffered && bufflength < sizeof(buffer))
1428        {
1429        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1430        bufflength += add;
1431        endptr += add;
1432        }
1433    
1434    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1435    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1436    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
# Line 1273  while (ptr < endptr) Line 1465  while (ptr < endptr)
1465      else      else
1466  #endif  #endif
1467    
1468      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*MBUFTHIRD +
1469          (input_line_buffered?
1470           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1471           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1472      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1473    
1474      /* Adjust any last match point */      /* Adjust any last match point */
# Line 1286  while (ptr < endptr) Line 1480  while (ptr < endptr)
1480  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1481  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1482    
1483  if (!only_matching && !count_only)  if (only_matching < 0 && !count_only)
1484    {    {
1485    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1486    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1305  if (filenames == FN_NOMATCH_ONLY) Line 1499  if (filenames == FN_NOMATCH_ONLY)
1499    
1500  if (count_only)  if (count_only)
1501    {    {
1502    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1503    fprintf(stdout, "%d\n", count);      {
1504        if (printname != NULL && filenames != FN_NONE)
1505          fprintf(stdout, "%s:", printname);
1506        fprintf(stdout, "%d\n", count);
1507        }
1508    }    }
1509    
1510  return rc;  return rc;
# Line 1361  if (strcmp(pathname, "-") == 0) Line 1559  if (strcmp(pathname, "-") == 0)
1559    }    }
1560    
1561  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1562  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1563  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1564    system-specific. */
1565    
1566  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1567    {    {
# Line 1383  if ((sep = isdirectory(pathname)) != 0) Line 1582  if ((sep = isdirectory(pathname)) != 0)
1582    
1583      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1584        {        {
1585        int frc, blen;        int frc, nflen;
1586        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1587        blen = strlen(buffer);        nflen = (int)(strlen(nextfile));
1588    
1589        if (exclude_compiled != NULL &&        if (isdirectory(buffer))
1590            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)          {
1591          continue;          if (exclude_dir_compiled != NULL &&
1592                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1593        if (include_compiled != NULL &&            continue;
1594            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
1595          continue;          if (include_dir_compiled != NULL &&
1596                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1597              continue;
1598            }
1599          else
1600            {
1601            if (exclude_compiled != NULL &&
1602                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1603              continue;
1604    
1605            if (include_compiled != NULL &&
1606                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1607              continue;
1608            }
1609    
1610        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1611        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 1416  skipping was not requested. The scan pro Line 1628  skipping was not requested. The scan pro
1628  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1629  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1630    
1631  pathlen = strlen(pathname);  pathlen = (int)(strlen(pathname));
1632    
1633  /* Open using zlib if it is supported and the file name ends with .gz. */  /* Open using zlib if it is supported and the file name ends with .gz. */
1634    
# Line 1456  an attempt to read a .bz2 file indicates Line 1668  an attempt to read a .bz2 file indicates
1668  PLAIN_FILE:  PLAIN_FILE:
1669  #endif  #endif
1670    {    {
1671    in = fopen(pathname, "r");    in = fopen(pathname, "rb");
1672    handle = (void *)in;    handle = (void *)in;
1673    frtype = FR_PLAIN;    frtype = FR_PLAIN;
1674    }    }
# Line 1578  for (op = optionlist; op->one_char != 0; Line 1790  for (op = optionlist; op->one_char != 0;
1790    {    {
1791    int n;    int n;
1792    char s[4];    char s[4];
1793    
1794      /* Two options were accidentally implemented and documented with underscores
1795      instead of hyphens in their names, something that was not noticed for quite a
1796      few releases. When fixing this, I left the underscored versions in the list
1797      in case people were using them. However, we don't want to display them in the
1798      help data. There are no other options that contain underscores, and we do not
1799      expect ever to implement such options. Therefore, just omit any option that
1800      contains an underscore. */
1801    
1802      if (strchr(op->long_name, '_') != NULL) continue;
1803    
1804    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1805    n = 30 - printf("  %s --%s", s, op->long_name);    n = 31 - printf("  %s --%s", s, op->long_name);
1806    if (n < 1) n = 1;    if (n < 1) n = 1;
1807    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                     ", op->help_text);
1808    }    }
1809    
1810  printf("\nWhen reading patterns from a file instead of using a command line option,\n");  printf("\nWhen reading patterns from a file instead of using a command line option,\n");
# Line 1605  handle_option(int letter, int options) Line 1828  handle_option(int letter, int options)
1828  switch(letter)  switch(letter)
1829    {    {
1830    case N_FOFFSETS: file_offsets = TRUE; break;    case N_FOFFSETS: file_offsets = TRUE; break;
1831    case N_HELP: help(); exit(0);    case N_HELP: help(); pcregrep_exit(0);
1832    case N_LOFFSETS: line_offsets = number = TRUE; break;    case N_LOFFSETS: line_offsets = number = TRUE; break;
1833      case N_LBUFFER: line_buffered = TRUE; break;
1834    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1835    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1836    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1837    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1838    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1839    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1840    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1841    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1842    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1843    case 'o': only_matching = TRUE; break;    case 'o': only_matching = 0; break;
1844    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1845    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1846    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1627  switch(letter) Line 1851  switch(letter)
1851    
1852    case 'V':    case 'V':
1853    fprintf(stderr, "pcregrep version %s\n", pcre_version());    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1854    exit(0);    pcregrep_exit(0);
1855    break;    break;
1856    
1857    default:    default:
1858    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1859    exit(usage(2));    pcregrep_exit(usage(2));
1860    }    }
1861    
1862  return options;  return options;
# Line 1797  const char *error; Line 2021  const char *error;
2021    
2022  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2023  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2024  */  Note that the return values from pcre_config(), though derived from the ASCII
2025    codes, are the same in EBCDIC environments, so we must use the actual values
2026    rather than escapes such as as '\r'. */
2027    
2028  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2029  switch(i)  switch(i)
2030    {    {
2031    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
2032    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
2033    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2034    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
2035    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
2036    }    }
2037    
2038  /* Process the options */  /* Process the options */
# Line 1826  for (i = 1; i < argc; i++) Line 2052  for (i = 1; i < argc; i++)
2052    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2053      {      {
2054      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2055        else exit(usage(2));        else pcregrep_exit(usage(2));
2056      }      }
2057    
2058    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1848  for (i = 1; i < argc; i++) Line 2074  for (i = 1; i < argc; i++)
2074      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2075      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2076      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2077      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2078      these categories, fortunately. */      both these categories. */
2079    
2080      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2081        {        {
2082        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2083        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2084        if (opbra == NULL)     /* Not a (p) case */  
2085          /* Handle options with only one spelling of the name */
2086    
2087          if (opbra == NULL)     /* Does not contain '(' */
2088          {          {
2089          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2090            {            {
# Line 1863  for (i = 1; i < argc; i++) Line 2092  for (i = 1; i < argc; i++)
2092            }            }
2093          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2094            {            {
2095            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2096            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2097                (int)strlen(arg) : (int)(argequals - arg);
2098            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2099              {              {
2100              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1877  for (i = 1; i < argc; i++) Line 2107  for (i = 1; i < argc; i++)
2107              }              }
2108            }            }
2109          }          }
2110        else                   /* Special case xxxx(p) */  
2111          /* Handle options with an alternate spelling of the name */
2112    
2113          else
2114          {          {
2115          char buff1[24];          char buff1[24];
2116          char buff2[24];          char buff2[24];
2117          int baselen = opbra - op->long_name;  
2118            int baselen = (int)(opbra - op->long_name);
2119            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2120            int arglen = (argequals == NULL || equals == NULL)?
2121              (int)strlen(arg) : (int)(argequals - arg);
2122    
2123          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2124          sprintf(buff2, "%s%.*s", buff1,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2125            (int)strlen(op->long_name) - baselen - 2, opbra + 1);  
2126          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2127               strncmp(arg, buff2, arglen) == 0)
2128              {
2129              if (equals != NULL && argequals != NULL)
2130                {
2131                option_data = argequals;
2132                if (*option_data == '=')
2133                  {
2134                  option_data++;
2135                  longopwasequals = TRUE;
2136                  }
2137                }
2138            break;            break;
2139              }
2140          }          }
2141        }        }
2142    
2143      if (op->one_char == 0)      if (op->one_char == 0)
2144        {        {
2145        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2146        exit(usage(2));        pcregrep_exit(usage(2));
2147        }        }
2148      }      }
2149    
   
2150    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2151    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2152    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1931  for (i = 1; i < argc; i++) Line 2180  for (i = 1; i < argc; i++)
2180      while (*s != 0)      while (*s != 0)
2181        {        {
2182        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2183          { if (*s == op->one_char) break; }          {
2184            if (*s == op->one_char) break;
2185            }
2186        if (op->one_char == 0)        if (op->one_char == 0)
2187          {          {
2188          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2189            *s, argv[i]);            *s, argv[i]);
2190          exit(usage(2));          pcregrep_exit(usage(2));
2191            }
2192    
2193          /* Check for a single-character option that has data: OP_OP_NUMBER
2194          is used for one that either has a numerical number or defaults, i.e. the
2195          data is optional. If a digit follows, there is data; if not, carry on
2196          with other single-character options in the same string. */
2197    
2198          option_data = s+1;
2199          if (op->type == OP_OP_NUMBER)
2200            {
2201            if (isdigit((unsigned char)s[1])) break;
2202          }          }
2203        if (op->type != OP_NODATA || s[1] == 0)        else   /* Check for end or a dataless option */
2204          {          {
2205          option_data = s+1;          if (op->type != OP_NODATA || s[1] == 0) break;
         break;  
2206          }          }
2207    
2208          /* Handle a single-character option with no data, then loop for the
2209          next character in the string. */
2210    
2211        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2212        }        }
2213      }      }
# Line 1959  for (i = 1; i < argc; i++) Line 2224  for (i = 1; i < argc; i++)
2224    
2225    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2226    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2227    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2228    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2229    
2230    if (*option_data == 0 &&    if (*option_data == 0 &&
2231        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1970  for (i = 1; i < argc; i++) Line 2235  for (i = 1; i < argc; i++)
2235        case N_COLOUR:        case N_COLOUR:
2236        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2237        break;        break;
2238    
2239          case 'o':
2240          only_matching = 0;
2241          break;
2242    
2243  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2244        case 'S':        case 'S':
2245        S_arg = 0;        S_arg = 0;
# Line 1986  for (i = 1; i < argc; i++) Line 2256  for (i = 1; i < argc; i++)
2256      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2257        {        {
2258        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2259        exit(usage(2));        pcregrep_exit(usage(2));
2260        }        }
2261      option_data = argv[++i];      option_data = argv[++i];
2262      }      }
# Line 2007  for (i = 1; i < argc; i++) Line 2277  for (i = 1; i < argc; i++)
2277    
2278    /* Otherwise, deal with single string or numeric data values. */    /* Otherwise, deal with single string or numeric data values. */
2279    
2280    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)    else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2281               op->type != OP_OP_NUMBER)
2282      {      {
2283      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2284      }      }
2285    
2286      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2287      only for unpicking arguments, so just keep it simple. */
2288    
2289    else    else
2290      {      {
2291      char *endptr;      unsigned long int n = 0;
2292      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2293        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2294        while (isdigit((unsigned char)(*endptr)))
2295          n = n * 10 + (int)(*endptr++ - '0');
2296      if (*endptr != 0)      if (*endptr != 0)
2297        {        {
2298        if (longop)        if (longop)
2299          {          {
2300          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2301          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2302            equals - op->long_name;            (int)(equals - op->long_name);
2303          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2304            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2305          }          }
2306        else        else
2307          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2308            option_data, op->one_char);            option_data, op->one_char);
2309        exit(usage(2));        pcregrep_exit(usage(2));
2310        }        }
2311      *((int *)op->dataptr) = n;      if (op->type == OP_LONGNUMBER)
2312            *((unsigned long int *)op->dataptr) = n;
2313        else
2314            *((int *)op->dataptr) = n;
2315      }      }
2316    }    }
2317    
# Line 2044  if (both_context > 0) Line 2325  if (both_context > 0)
2325    }    }
2326    
2327  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2328  However, the latter two set the only_matching flag. */  However, the latter two set only_matching. */
2329    
2330  if ((only_matching && (file_offsets || line_offsets)) ||  if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2331      (file_offsets && line_offsets))      (file_offsets && line_offsets))
2332    {    {
2333    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2334      "and/or --line-offsets\n");      "and/or --line-offsets\n");
2335    exit(usage(2));    pcregrep_exit(usage(2));
2336    }    }
2337    
2338  if (file_offsets || line_offsets) only_matching = TRUE;  if (file_offsets || line_offsets) only_matching = 0;
2339    
2340  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2341  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
# Line 2263  for (j = 0; j < pattern_count; j++) Line 2544  for (j = 0; j < pattern_count; j++)
2544    hint_count++;    hint_count++;
2545    }    }
2546    
2547    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2548    pcre_extra block for each pattern. */
2549    
2550    if (match_limit > 0 || match_limit_recursion > 0)
2551      {
2552      for (j = 0; j < pattern_count; j++)
2553        {
2554        if (hints_list[j] == NULL)
2555          {
2556          hints_list[j] = malloc(sizeof(pcre_extra));
2557          if (hints_list[j] == NULL)
2558            {
2559            fprintf(stderr, "pcregrep: malloc failed\n");
2560            pcregrep_exit(2);
2561            }
2562          }
2563        if (match_limit > 0)
2564          {
2565          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2566          hints_list[j]->match_limit = match_limit;
2567          }
2568        if (match_limit_recursion > 0)
2569          {
2570          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2571          hints_list[j]->match_limit_recursion = match_limit_recursion;
2572          }
2573        }
2574      }
2575    
2576  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2577    
2578  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
# Line 2289  if (include_pattern != NULL) Line 2599  if (include_pattern != NULL)
2599      }      }
2600    }    }
2601    
2602    if (exclude_dir_pattern != NULL)
2603      {
2604      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2605        pcretables);
2606      if (exclude_dir_compiled == NULL)
2607        {
2608        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2609          errptr, error);
2610        goto EXIT2;
2611        }
2612      }
2613    
2614    if (include_dir_pattern != NULL)
2615      {
2616      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2617        pcretables);
2618      if (include_dir_compiled == NULL)
2619        {
2620        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2621          errptr, error);
2622        goto EXIT2;
2623        }
2624      }
2625    
2626  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2627    
2628  if (i >= argc)  if (i >= argc)
# Line 2320  if (pattern_list != NULL) Line 2654  if (pattern_list != NULL)
2654    }    }
2655  if (hints_list != NULL)  if (hints_list != NULL)
2656    {    {
2657    for (i = 0; i < hint_count; i++) free(hints_list[i]);    for (i = 0; i < hint_count; i++)
2658        {
2659        if (hints_list[i] != NULL) free(hints_list[i]);
2660        }
2661    free(hints_list);    free(hints_list);
2662    }    }
2663  return rc;  pcregrep_exit(rc);
2664    
2665  EXIT2:  EXIT2:
2666  rc = 2;  rc = 2;

Legend:
Removed from v.296  
changed lines
  Added in v.586

  ViewVC Help
Powered by ViewVC 1.1.5