/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 564 by ph10, Sun Oct 31 16:07:24 2010 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.2 09-Jan-2006"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 65  typedef int BOOL; Line 79  typedef int BOOL;
79  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
80  #endif  #endif
81    
   
82  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 100  enum { DEE_READ, DEE_SKIP };
100  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
101  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 94  regular code. */ Line 123  regular code. */
123    
124  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
125  static int S_arg = -1;  static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130  #endif  #endif
131    
132    static int  endlinetype;
133    
134  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
135  static char *colour_option = NULL;  static char *colour_option = NULL;
136  static char *dee_option = NULL;  static char *dee_option = NULL;
137  static char *DEE_option = NULL;  static char *DEE_option = NULL;
138    static char *newline = NULL;
139  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
140  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
141  static char *locale = NULL;  static char *locale = NULL;
# Line 107  static char *locale = NULL; Line 143  static char *locale = NULL;
143  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
144    
145  static int  pattern_count = 0;  static int  pattern_count = 0;
146  static pcre **pattern_list;  static pcre **pattern_list = NULL;
147  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
148    
149  static char *include_pattern = NULL;  static char *include_pattern = NULL;
150  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
155  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159  static int after_context = 0;  static int after_context = 0;
160  static int before_context = 0;  static int before_context = 0;
# Line 125  static int error_count = 0; Line 165  static int error_count = 0;
165  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
166  static int process_options = 0;  static int process_options = 0;
167    
168    static unsigned long int match_limit = 0;
169    static unsigned long int match_limit_recursion = 0;
170    
171  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
172  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
173    static BOOL file_offsets = FALSE;
174  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
175  static BOOL invert = FALSE;  static BOOL invert = FALSE;
176    static BOOL line_buffered = FALSE;
177    static BOOL line_offsets = FALSE;
178  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
179  static BOOL number = FALSE;  static BOOL number = FALSE;
180    static BOOL omit_zero_count = FALSE;
181  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
182    static BOOL resource_error = FALSE;
183  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
184  static BOOL silent = FALSE;  static BOOL silent = FALSE;
185    static BOOL utf8 = FALSE;
186    
187  /* Structure for options and list of them */  /* Structure for options and list of them */
188    
# Line 151  typedef struct option_item { Line 200  typedef struct option_item {
200  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
201  used to identify them. */  used to identify them. */
202    
203  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
204  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
205  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
206  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
207  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
208  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
209  #define N_NULL      (-7)  #define N_LABEL        (-7)
210    #define N_LOCALE       (-8)
211    #define N_NULL         (-9)
212    #define N_LOFFSETS     (-10)
213    #define N_FOFFSETS     (-11)
214    #define N_LBUFFER      (-12)
215    #define N_M_LIMIT      (-13)
216    #define N_M_LIMIT_REC  (-14)
217    
218  static option_item optionlist[] = {  static option_item optionlist[] = {
219    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 165  static option_item optionlist[] = { Line 221  static option_item optionlist[] = {
221    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
222    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
223    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
224      { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
225    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
226    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
   { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },  
227    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
228    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
229    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
230    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
231    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
232      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
233    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
234    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
235    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
236    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
237    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
238    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
239      { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
240      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
241    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
242      { OP_NUMBER,    N_M_LIMIT,&match_limit,      "match-limit=number", "set PCRE match limit option" },
243      { OP_NUMBER,    N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
245      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
247    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
248    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
249    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
250    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
251    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
252      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
253      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
254  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
255    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
256  #endif  #endif
# Line 210  static const char *prefix[] = { Line 274  static const char *prefix[] = {
274  static const char *suffix[] = {  static const char *suffix[] = {
275    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
276    
277    /* UTF-8 tables - used only when the newline setting is "any". */
278    
279    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
280    
281    const char utf8_table4[] = {
282      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
283      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
284      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
285      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
286    
287    
288    
289  /*************************************************  /*************************************************
# Line 222  although at present the only ones are fo Line 296  although at present the only ones are fo
296    
297  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
298    
299  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
300  #include <sys/types.h>  #include <sys/types.h>
301  #include <sys/stat.h>  #include <sys/stat.h>
302  #include <dirent.h>  #include <dirent.h>
# Line 254  for (;;) Line 328  for (;;)
328    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
329      return dent->d_name;      return dent->d_name;
330    }    }
331  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
332  }  }
333    
334  static void  static void
# Line 276  return (statbuf.st_mode & S_IFMT) == S_I Line 350  return (statbuf.st_mode & S_IFMT) == S_I
350  }  }
351    
352    
353  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
354    
355  static BOOL  static BOOL
356  is_stdout_tty(void)  is_stdout_tty(void)
# Line 284  is_stdout_tty(void) Line 358  is_stdout_tty(void)
358  return isatty(fileno(stdout));  return isatty(fileno(stdout));
359  }  }
360    
361    static BOOL
362    is_file_tty(FILE *f)
363    {
364    return isatty(fileno(f));
365    }
366    
367    
368  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
369    
370  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
371  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
372  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
373    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
374    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
375    undefined when it is indeed undefined. */
376    
377    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
 #elif HAVE_WIN32API  
378    
379  #ifndef STRICT  #ifndef STRICT
380  # define STRICT  # define STRICT
# Line 300  when it did not exist. */ Line 382  when it did not exist. */
382  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
383  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
384  #endif  #endif
385    
386    #include <windows.h>
387    
388  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
389  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
390  #endif  #endif
391    
 #include <windows.h>  
   
392  typedef struct directory_type  typedef struct directory_type
393  {  {
394  HANDLE handle;  HANDLE handle;
# Line 335  dir = (directory_type *) malloc(sizeof(* Line 418  dir = (directory_type *) malloc(sizeof(*
418  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
419    {    {
420    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
421    exit(2);    pcregrep_exit(2);
422    }    }
423  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
424  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 390  regular if they are not directories. */ Line 473  regular if they are not directories. */
473    
474  int isregfile(char *filename)  int isregfile(char *filename)
475  {  {
476  return !isdirectory(filename)  return !isdirectory(filename);
477  }  }
478    
479    
480  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
481    
482  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
483    
484  static BOOL  static BOOL
485  is_stdout_tty(void)  is_stdout_tty(void)
486  {  {
487  FALSE;  return FALSE;
488    }
489    
490    static BOOL
491    is_file_tty(FILE *f)
492    {
493    return FALSE;
494  }  }
495    
496    
# Line 414  FALSE; Line 503  FALSE;
503  typedef void directory_type;  typedef void directory_type;
504    
505  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
506  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
507  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
508  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
509    
510    
# Line 426  void closedirectory(directory_type *dir) Line 515  void closedirectory(directory_type *dir)
515  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
516    
517    
518  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
519    
520  static BOOL  static BOOL
521  is_stdout_tty(void)  is_stdout_tty(void)
# Line 434  is_stdout_tty(void) Line 523  is_stdout_tty(void)
523  return FALSE;  return FALSE;
524  }  }
525    
526    static BOOL
527    is_file_tty(FILE *f)
528    {
529    return FALSE;
530    }
531    
532  #endif  #endif
533    
534    
535    
536  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
537  /*************************************************  /*************************************************
538  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
539  *************************************************/  *************************************************/
# Line 462  return sys_errlist[n]; Line 556  return sys_errlist[n];
556    
557    
558  /*************************************************  /*************************************************
559    *         Exit from the program                  *
560    *************************************************/
561    
562    /* If there has been a resource error, give a suitable message.
563    
564    Argument:  the return code
565    Returns:   does not return
566    */
567    
568    static void
569    pcregrep_exit(int rc)
570    {
571    if (resource_error)
572      {
573      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
574        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
575      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
576      }
577    
578    exit(rc);
579    }
580    
581    
582    
583    /*************************************************
584    *            Read one line of input              *
585    *************************************************/
586    
587    /* Normally, input is read using fread() into a large buffer, so many lines may
588    be read at once. However, doing this for tty input means that no output appears
589    until a lot of input has been typed. Instead, tty input is handled line by
590    line. We cannot use fgets() for this, because it does not stop at a binary
591    zero, and therefore there is no way of telling how many characters it has read,
592    because there may be binary zeros embedded in the data.
593    
594    Arguments:
595      buffer     the buffer to read into
596      length     the maximum number of characters to read
597      f          the file
598    
599    Returns:     the number of characters read, zero at end of file
600    */
601    
602    static int
603    read_one_line(char *buffer, int length, FILE *f)
604    {
605    int c;
606    int yield = 0;
607    while ((c = fgetc(f)) != EOF)
608      {
609      buffer[yield++] = c;
610      if (c == '\n' || yield >= length) break;
611      }
612    return yield;
613    }
614    
615    
616    
617    /*************************************************
618    *             Find end of line                   *
619    *************************************************/
620    
621    /* The length of the endline sequence that is found is set via lenptr. This may
622    be zero at the very end of the file if there is no line-ending sequence there.
623    
624    Arguments:
625      p         current position in line
626      endptr    end of available data
627      lenptr    where to put the length of the eol sequence
628    
629    Returns:    pointer to the last byte of the line
630    */
631    
632    static char *
633    end_of_line(char *p, char *endptr, int *lenptr)
634    {
635    switch(endlinetype)
636      {
637      default:      /* Just in case */
638      case EL_LF:
639      while (p < endptr && *p != '\n') p++;
640      if (p < endptr)
641        {
642        *lenptr = 1;
643        return p + 1;
644        }
645      *lenptr = 0;
646      return endptr;
647    
648      case EL_CR:
649      while (p < endptr && *p != '\r') p++;
650      if (p < endptr)
651        {
652        *lenptr = 1;
653        return p + 1;
654        }
655      *lenptr = 0;
656      return endptr;
657    
658      case EL_CRLF:
659      for (;;)
660        {
661        while (p < endptr && *p != '\r') p++;
662        if (++p >= endptr)
663          {
664          *lenptr = 0;
665          return endptr;
666          }
667        if (*p == '\n')
668          {
669          *lenptr = 2;
670          return p + 1;
671          }
672        }
673      break;
674    
675      case EL_ANYCRLF:
676      while (p < endptr)
677        {
678        int extra = 0;
679        register int c = *((unsigned char *)p);
680    
681        if (utf8 && c >= 0xc0)
682          {
683          int gcii, gcss;
684          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
685          gcss = 6*extra;
686          c = (c & utf8_table3[extra]) << gcss;
687          for (gcii = 1; gcii <= extra; gcii++)
688            {
689            gcss -= 6;
690            c |= (p[gcii] & 0x3f) << gcss;
691            }
692          }
693    
694        p += 1 + extra;
695    
696        switch (c)
697          {
698          case 0x0a:    /* LF */
699          *lenptr = 1;
700          return p;
701    
702          case 0x0d:    /* CR */
703          if (p < endptr && *p == 0x0a)
704            {
705            *lenptr = 2;
706            p++;
707            }
708          else *lenptr = 1;
709          return p;
710    
711          default:
712          break;
713          }
714        }   /* End of loop for ANYCRLF case */
715    
716      *lenptr = 0;  /* Must have hit the end */
717      return endptr;
718    
719      case EL_ANY:
720      while (p < endptr)
721        {
722        int extra = 0;
723        register int c = *((unsigned char *)p);
724    
725        if (utf8 && c >= 0xc0)
726          {
727          int gcii, gcss;
728          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
729          gcss = 6*extra;
730          c = (c & utf8_table3[extra]) << gcss;
731          for (gcii = 1; gcii <= extra; gcii++)
732            {
733            gcss -= 6;
734            c |= (p[gcii] & 0x3f) << gcss;
735            }
736          }
737    
738        p += 1 + extra;
739    
740        switch (c)
741          {
742          case 0x0a:    /* LF */
743          case 0x0b:    /* VT */
744          case 0x0c:    /* FF */
745          *lenptr = 1;
746          return p;
747    
748          case 0x0d:    /* CR */
749          if (p < endptr && *p == 0x0a)
750            {
751            *lenptr = 2;
752            p++;
753            }
754          else *lenptr = 1;
755          return p;
756    
757          case 0x85:    /* NEL */
758          *lenptr = utf8? 2 : 1;
759          return p;
760    
761          case 0x2028:  /* LS */
762          case 0x2029:  /* PS */
763          *lenptr = 3;
764          return p;
765    
766          default:
767          break;
768          }
769        }   /* End of loop for ANY case */
770    
771      *lenptr = 0;  /* Must have hit the end */
772      return endptr;
773      }     /* End of overall switch */
774    }
775    
776    
777    
778    /*************************************************
779    *         Find start of previous line            *
780    *************************************************/
781    
782    /* This is called when looking back for before lines to print.
783    
784    Arguments:
785      p         start of the subsequent line
786      startptr  start of available data
787    
788    Returns:    pointer to the start of the previous line
789    */
790    
791    static char *
792    previous_line(char *p, char *startptr)
793    {
794    switch(endlinetype)
795      {
796      default:      /* Just in case */
797      case EL_LF:
798      p--;
799      while (p > startptr && p[-1] != '\n') p--;
800      return p;
801    
802      case EL_CR:
803      p--;
804      while (p > startptr && p[-1] != '\n') p--;
805      return p;
806    
807      case EL_CRLF:
808      for (;;)
809        {
810        p -= 2;
811        while (p > startptr && p[-1] != '\n') p--;
812        if (p <= startptr + 1 || p[-2] == '\r') return p;
813        }
814      return p;   /* But control should never get here */
815    
816      case EL_ANY:
817      case EL_ANYCRLF:
818      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
819      if (utf8) while ((*p & 0xc0) == 0x80) p--;
820    
821      while (p > startptr)
822        {
823        register int c;
824        char *pp = p - 1;
825    
826        if (utf8)
827          {
828          int extra = 0;
829          while ((*pp & 0xc0) == 0x80) pp--;
830          c = *((unsigned char *)pp);
831          if (c >= 0xc0)
832            {
833            int gcii, gcss;
834            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
835            gcss = 6*extra;
836            c = (c & utf8_table3[extra]) << gcss;
837            for (gcii = 1; gcii <= extra; gcii++)
838              {
839              gcss -= 6;
840              c |= (pp[gcii] & 0x3f) << gcss;
841              }
842            }
843          }
844        else c = *((unsigned char *)pp);
845    
846        if (endlinetype == EL_ANYCRLF) switch (c)
847          {
848          case 0x0a:    /* LF */
849          case 0x0d:    /* CR */
850          return p;
851    
852          default:
853          break;
854          }
855    
856        else switch (c)
857          {
858          case 0x0a:    /* LF */
859          case 0x0b:    /* VT */
860          case 0x0c:    /* FF */
861          case 0x0d:    /* CR */
862          case 0x85:    /* NEL */
863          case 0x2028:  /* LS */
864          case 0x2029:  /* PS */
865          return p;
866    
867          default:
868          break;
869          }
870    
871        p = pp;  /* Back one character */
872        }        /* End of loop for ANY case */
873    
874      return startptr;  /* Hit start of data */
875      }     /* End of overall switch */
876    }
877    
878    
879    
880    
881    
882    /*************************************************
883  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
884  *************************************************/  *************************************************/
885    
# Line 486  if (after_context > 0 && lastmatchnumber Line 904  if (after_context > 0 && lastmatchnumber
904    int count = 0;    int count = 0;
905    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
906      {      {
907        int ellength;
908      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
909      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
910      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
911      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
912      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
913      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
914      }      }
915    hyphenpending = TRUE;    hyphenpending = TRUE;
916    }    }
# Line 500  if (after_context > 0 && lastmatchnumber Line 919  if (after_context > 0 && lastmatchnumber
919    
920    
921  /*************************************************  /*************************************************
922    *   Apply patterns to subject till one matches   *
923    *************************************************/
924    
925    /* This function is called to run through all patterns, looking for a match. It
926    is used multiple times for the same subject when colouring is enabled, in order
927    to find all possible matches.
928    
929    Arguments:
930      matchptr    the start of the subject
931      length      the length of the subject to match
932      offsets     the offets vector to fill in
933      mrc         address of where to put the result of pcre_exec()
934    
935    Returns:      TRUE if there was a match
936                  FALSE if there was no match
937                  invert if there was a non-fatal error
938    */
939    
940    static BOOL
941    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
942    {
943    int i;
944    size_t slen = length;
945    const char *msg = "this text:\n\n";
946    if (slen > 200)
947      {
948      slen = 200;
949      msg = "text that starts:\n\n";
950      }
951    for (i = 0; i < pattern_count; i++)
952      {
953      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
954        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
955      if (*mrc >= 0) return TRUE;
956      if (*mrc == PCRE_ERROR_NOMATCH) continue;
957      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
958      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
959      fprintf(stderr, "%s", msg);
960      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
961      fprintf(stderr, "\n\n");
962      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
963        resource_error = TRUE;
964      if (error_count++ > 20)
965        {
966        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
967        pcregrep_exit(2);
968        }
969      return invert;    /* No more matching; don't show the line again */
970      }
971    
972    return FALSE;  /* No match, no errors */
973    }
974    
975    
976    
977    /*************************************************
978  *            Grep an individual file             *  *            Grep an individual file             *
979  *************************************************/  *************************************************/
980    
# Line 511  be in the middle third most of the time, Line 986  be in the middle third most of the time,
986  "before" context printing.  "before" context printing.
987    
988  Arguments:  Arguments:
989    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
990                   the gzFile pointer when reading is via libz
991                   the BZFILE pointer when reading is via libbz2
992      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
993    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
994                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
995                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
996    
997  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
998                 1 otherwise (no matches)                 1 otherwise (no matches)
999                   2 if there is a read error on a .bz2 file
1000  */  */
1001    
1002  static int  static int
1003  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
1004  {  {
1005  int rc = 1;  int rc = 1;
1006  int linenumber = 1;  int linenumber = 1;
1007  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1008  int count = 0;  int count = 0;
1009  int offsets[99];  int filepos = 0;
1010    int offsets[OFFSET_SIZE];
1011  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1012  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
1013  char *ptr = buffer;  char *ptr = buffer;
1014  char *endptr;  char *endptr;
1015  size_t bufflength;  size_t bufflength;
1016  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1017    BOOL input_line_buffered = line_buffered;
1018    FILE *in = NULL;                    /* Ensure initialized */
1019    
1020    #ifdef SUPPORT_LIBZ
1021    gzFile ingz = NULL;
1022    #endif
1023    
1024    #ifdef SUPPORT_LIBBZ2
1025    BZFILE *inbz2 = NULL;
1026    #endif
1027    
1028    
1029    /* Do the first read into the start of the buffer and set up the pointer to end
1030    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1031    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1032    fail. */
1033    
1034    #ifdef SUPPORT_LIBZ
1035    if (frtype == FR_LIBZ)
1036      {
1037      ingz = (gzFile)handle;
1038      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1039      }
1040    else
1041    #endif
1042    
1043  /* Do the first read into the start of the buffer and set up the pointer to  #ifdef SUPPORT_LIBBZ2
1044  end of what we have. */  if (frtype == FR_LIBBZ2)
1045      {
1046      inbz2 = (BZFILE *)handle;
1047      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1048      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1049      }                                    /* without the cast it is unsigned. */
1050    else
1051    #endif
1052    
1053      {
1054      in = (FILE *)handle;
1055      if (is_file_tty(in)) input_line_buffered = TRUE;
1056      bufflength = input_line_buffered?
1057        read_one_line(buffer, 3*MBUFTHIRD, in) :
1058        fread(buffer, 1, 3*MBUFTHIRD, in);
1059      }
1060    
 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  
1061  endptr = buffer + bufflength;  endptr = buffer + bufflength;
1062    
1063  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 548  way, the buffer is shifted left and re-f Line 1067  way, the buffer is shifted left and re-f
1067    
1068  while (ptr < endptr)  while (ptr < endptr)
1069    {    {
1070    int i;    int endlinelength;
1071    int mrc = 0;    int mrc = 0;
1072    BOOL match = FALSE;    BOOL match;
1073      char *matchptr = ptr;
1074    char *t = ptr;    char *t = ptr;
1075    size_t length, linelength;    size_t length, linelength;
1076    
1077    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1078    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1079    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1080    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1081    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1082    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1083      first line. */
1084    linelength = 0;  
1085    while (t < endptr && *t++ != '\n') linelength++;    t = end_of_line(t, endptr, &endlinelength);
1086    length = multiline? endptr - ptr : linelength;    linelength = t - ptr - endlinelength;
1087      length = multiline? (size_t)(endptr - ptr) : linelength;
1088    /* Run through all the patterns until one matches. Note that we don't include  
1089    the final newline in the subject string. */    /* Extra processing for Jeffrey Friedl's debugging. */
1090    
1091    for (i = 0; i < pattern_count; i++)  #ifdef JFRIEDL_DEBUG
1092      {    if (jfriedl_XT || jfriedl_XR)
1093      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    {
1094        offsets, 99);        #include <sys/time.h>
1095      if (mrc >= 0) { match = TRUE; break; }        #include <time.h>
1096      if (mrc != PCRE_ERROR_NOMATCH)        struct timeval start_time, end_time;
1097        {        struct timezone dummy;
1098        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);        int i;
1099        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
1100        fprintf(stderr, "this line:\n");        if (jfriedl_XT)
1101        fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */        {
1102        fprintf(stderr, "\n");            unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1103        if (error_count == 0 &&            const char *orig = ptr;
1104            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))            ptr = malloc(newlen + 1);
1105          {            if (!ptr) {
1106          fprintf(stderr, "pcregrep: error %d means that a resource limit "                    printf("out of memory");
1107            "was exceeded\n", mrc);                    pcregrep_exit(2);
1108          fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");            }
1109          }            endptr = ptr;
1110        if (error_count++ > 20)            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1111          {            for (i = 0; i < jfriedl_XT; i++) {
1112          fprintf(stderr, "pcregrep: too many errors - abandoned\n");                    strncpy(endptr, orig,  length);
1113          exit(2);                    endptr += length;
1114          }            }
1115        match = invert;    /* No more matching; don't show the line again */            strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1116        break;            length = newlen;
1117        }        }
1118      }  
1119          if (gettimeofday(&start_time, &dummy) != 0)
1120                  perror("bad gettimeofday");
1121    
1122    
1123          for (i = 0; i < jfriedl_XR; i++)
1124              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1125                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1126    
1127          if (gettimeofday(&end_time, &dummy) != 0)
1128                  perror("bad gettimeofday");
1129    
1130          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1131                          -
1132                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1133    
1134          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1135          return 0;
1136      }
1137    #endif
1138    
1139      /* We come back here after a match when the -o option (only_matching) is set,
1140      in order to find any further matches in the same line. */
1141    
1142      ONLY_MATCHING_RESTART:
1143    
1144      /* Run through all the patterns until one matches or there is an error other
1145      than NOMATCH. This code is in a subroutine so that it can be re-used for
1146      finding subsequent matches when colouring matched lines. */
1147    
1148      match = match_patterns(matchptr, length, offsets, &mrc);
1149    
1150    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1151    
# Line 614  while (ptr < endptr) Line 1164  while (ptr < endptr)
1164      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1165      in the file. */      in the file. */
1166    
1167      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1168        {        {
1169        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1170        return 0;        return 0;
# Line 625  while (ptr < endptr) Line 1175  while (ptr < endptr)
1175      else if (quiet) return 0;      else if (quiet) return 0;
1176    
1177      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
1178      does not pring any context. */      the --file-offsets and --line-offsets options output offsets for the
1179        matching substring (they both force --only-matching). None of these options
1180        prints any context. Afterwards, adjust the start and length, and then jump
1181        back to look for further matches in the same line. If we are in invert
1182        mode, however, nothing is printed - this could be still useful because the
1183        return code is set. */
1184    
1185      else if (only_matching)      else if (only_matching)
1186        {        {
1187        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1188        if (number) fprintf(stdout, "%d:", linenumber);          {
1189        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1190        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1191            if (line_offsets)
1192              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1193                offsets[1] - offsets[0]);
1194            else if (file_offsets)
1195              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1196                offsets[1] - offsets[0]);
1197            else
1198              {
1199              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1200              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1201              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1202              }
1203            fprintf(stdout, "\n");
1204            matchptr += offsets[1];
1205            length -= offsets[1];
1206            match = FALSE;
1207            if (line_buffered) fflush(stdout);
1208            rc = 0;    /* Had some success */
1209            goto ONLY_MATCHING_RESTART;
1210            }
1211        }        }
1212    
1213      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 646  while (ptr < endptr) Line 1221  while (ptr < endptr)
1221    
1222        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1223          {          {
1224            int ellength;
1225          int linecount = 0;          int linecount = 0;
1226          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1227    
1228          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1229            {            {
1230            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1231            linecount++;            linecount++;
1232            }            }
1233    
# Line 665  while (ptr < endptr) Line 1240  while (ptr < endptr)
1240            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1241            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1242            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1243            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1244            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1245            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1246            }            }
1247          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1248          }          }
# Line 693  while (ptr < endptr) Line 1268  while (ptr < endptr)
1268                 linecount < before_context)                 linecount < before_context)
1269            {            {
1270            linecount++;            linecount++;
1271            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1272            }            }
1273    
1274          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 702  while (ptr < endptr) Line 1276  while (ptr < endptr)
1276    
1277          while (p < ptr)          while (p < ptr)
1278            {            {
1279              int ellength;
1280            char *pp = p;            char *pp = p;
1281            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1282            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1283            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1284            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            FWRITE(p, 1, pp - p, stdout);
1285            p = pp + 1;            p = pp;
1286            }            }
1287          }          }
1288    
# Line 722  while (ptr < endptr) Line 1297  while (ptr < endptr)
1297    
1298        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1299        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1300        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1301        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1302          the match will always be before the first newline sequence. */
1303    
1304        if (multiline)        if (multiline)
1305          {          {
1306          char *endmatch = ptr + offsets[1];          int ellength;
1307          t = ptr;          char *endmatch = ptr;
1308          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          if (!invert)
1309          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1310          linelength = endmatch - ptr;            endmatch += offsets[1];
1311              t = ptr;
1312              while (t < endmatch)
1313                {
1314                t = end_of_line(t, endptr, &ellength);
1315                if (t <= endmatch) linenumber++; else break;
1316                }
1317              }
1318            endmatch = end_of_line(endmatch, endptr, &ellength);
1319            linelength = endmatch - ptr - ellength;
1320          }          }
1321    
1322        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 746  while (ptr < endptr) Line 1331  while (ptr < endptr)
1331          {          {
1332          int first = S_arg * 2;          int first = S_arg * 2;
1333          int last  = first + 1;          int last  = first + 1;
1334          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1335          fprintf(stdout, "X");          fprintf(stdout, "X");
1336          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1337          }          }
1338        else        else
1339  #endif  #endif
1340    
1341        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1342          matches. */
1343    
1344        if (do_colour)        if (do_colour)
1345          {          {
1346          fwrite(ptr, 1, offsets[0], stdout);          int last_offset = 0;
1347            FWRITE(ptr, 1, offsets[0], stdout);
1348          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1349          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1350          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1351          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1352              {
1353              last_offset += offsets[1];
1354              matchptr += offsets[1];
1355              length -= offsets[1];
1356              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1357              FWRITE(matchptr, 1, offsets[0], stdout);
1358              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1359              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1360              fprintf(stdout, "%c[00m", 0x1b);
1361              }
1362            FWRITE(ptr + last_offset, 1,
1363              (linelength + endlinelength) - last_offset, stdout);
1364          }          }
       else fwrite(ptr, 1, linelength, stdout);  
1365    
1366        fprintf(stdout, "\n");        /* Not colouring; no need to search for further matches */
1367    
1368          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1369        }        }
1370    
1371      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1372        given, flush the output. */
1373    
1374        if (line_buffered) fflush(stdout);
1375      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1376    
1377      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1378      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1379    
1380      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1381      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1382      }      }
1383    
1384    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1385      anything to be printed), we have to move on to the end of the match before
1386      proceeding. */
1387    
1388      if (multiline && invert && match)
1389        {
1390        int ellength;
1391        char *endmatch = ptr + offsets[1];
1392        t = ptr;
1393        while (t < endmatch)
1394          {
1395          t = end_of_line(t, endptr, &ellength);
1396          if (t <= endmatch) linenumber++; else break;
1397          }
1398        endmatch = end_of_line(endmatch, endptr, &ellength);
1399        linelength = endmatch - ptr - ellength;
1400        }
1401    
1402      /* Advance to after the newline and increment the line number. The file
1403      offset to the current line is maintained in filepos. */
1404    
1405    ptr += linelength + 1;    ptr += linelength + endlinelength;
1406      filepos += (int)(linelength + endlinelength);
1407    linenumber++;    linenumber++;
1408    
1409      /* If input is line buffered, and the buffer is not yet full, read another
1410      line and add it into the buffer. */
1411    
1412      if (input_line_buffered && bufflength < sizeof(buffer))
1413        {
1414        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1415        bufflength += add;
1416        endptr += add;
1417        }
1418    
1419    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1420    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1421    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
# Line 803  while (ptr < endptr) Line 1435  while (ptr < endptr)
1435    
1436      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1437      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1438      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1439    #ifdef SUPPORT_LIBZ
1440        if (frtype == FR_LIBZ)
1441          bufflength = 2*MBUFTHIRD +
1442            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1443        else
1444    #endif
1445    
1446    #ifdef SUPPORT_LIBBZ2
1447        if (frtype == FR_LIBBZ2)
1448          bufflength = 2*MBUFTHIRD +
1449            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1450        else
1451    #endif
1452    
1453        bufflength = 2*MBUFTHIRD +
1454          (input_line_buffered?
1455           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1456           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1457      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1458    
1459      /* Adjust any last match point */      /* Adjust any last match point */
# Line 834  if (filenames == FN_NOMATCH_ONLY) Line 1484  if (filenames == FN_NOMATCH_ONLY)
1484    
1485  if (count_only)  if (count_only)
1486    {    {
1487    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1488    fprintf(stdout, "%d\n", count);      {
1489        if (printname != NULL && filenames != FN_NONE)
1490          fprintf(stdout, "%s:", printname);
1491        fprintf(stdout, "%d\n", count);
1492        }
1493    }    }
1494    
1495  return rc;  return rc;
# Line 867  grep_or_recurse(char *pathname, BOOL dir Line 1521  grep_or_recurse(char *pathname, BOOL dir
1521  {  {
1522  int rc = 1;  int rc = 1;
1523  int sep;  int sep;
1524  FILE *in;  int frtype;
1525    int pathlen;
1526    void *handle;
1527    FILE *in = NULL;           /* Ensure initialized */
1528    
1529    #ifdef SUPPORT_LIBZ
1530    gzFile ingz = NULL;
1531    #endif
1532    
1533    #ifdef SUPPORT_LIBBZ2
1534    BZFILE *inbz2 = NULL;
1535    #endif
1536    
1537  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1538    
1539  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1540    {    {
1541    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1542      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1543        stdin_name : NULL);        stdin_name : NULL);
1544    }    }
1545    
   
1546  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1547  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1548  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1549    system-specific. */
1550    
1551  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1552    {    {
# Line 902  if ((sep = isdirectory(pathname)) != 0) Line 1567  if ((sep = isdirectory(pathname)) != 0)
1567    
1568      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1569        {        {
1570        int frc, blen;        int frc, nflen;
1571        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1572        blen = strlen(buffer);        nflen = (int)(strlen(nextfile));
1573    
1574        if (exclude_compiled != NULL &&        if (isdirectory(buffer))
1575            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)          {
1576          continue;          if (exclude_dir_compiled != NULL &&
1577                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1578        if (include_compiled != NULL &&            continue;
1579            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
1580          continue;          if (include_dir_compiled != NULL &&
1581                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1582              continue;
1583            }
1584          else
1585            {
1586            if (exclude_compiled != NULL &&
1587                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1588              continue;
1589    
1590            if (include_compiled != NULL &&
1591                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1592              continue;
1593            }
1594    
1595        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1596        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 935  skipping was not requested. The scan pro Line 1613  skipping was not requested. The scan pro
1613  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1614  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1615    
1616  in = fopen(pathname, "r");  pathlen = (int)(strlen(pathname));
1617  if (in == NULL)  
1618    /* Open using zlib if it is supported and the file name ends with .gz. */
1619    
1620    #ifdef SUPPORT_LIBZ
1621    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1622      {
1623      ingz = gzopen(pathname, "rb");
1624      if (ingz == NULL)
1625        {
1626        if (!silent)
1627          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1628            strerror(errno));
1629        return 2;
1630        }
1631      handle = (void *)ingz;
1632      frtype = FR_LIBZ;
1633      }
1634    else
1635    #endif
1636    
1637    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1638    
1639    #ifdef SUPPORT_LIBBZ2
1640    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1641      {
1642      inbz2 = BZ2_bzopen(pathname, "rb");
1643      handle = (void *)inbz2;
1644      frtype = FR_LIBBZ2;
1645      }
1646    else
1647    #endif
1648    
1649    /* Otherwise use plain fopen(). The label is so that we can come back here if
1650    an attempt to read a .bz2 file indicates that it really is a plain file. */
1651    
1652    #ifdef SUPPORT_LIBBZ2
1653    PLAIN_FILE:
1654    #endif
1655      {
1656      in = fopen(pathname, "rb");
1657      handle = (void *)in;
1658      frtype = FR_PLAIN;
1659      }
1660    
1661    /* All the opening methods return errno when they fail. */
1662    
1663    if (handle == NULL)
1664    {    {
1665    if (!silent)    if (!silent)
1666      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 944  if (in == NULL) Line 1668  if (in == NULL)
1668    return 2;    return 2;
1669    }    }
1670    
1671  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1672    
1673    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1674    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1675    
1676    /* Close in an appropriate manner. */
1677    
1678    #ifdef SUPPORT_LIBZ
1679    if (frtype == FR_LIBZ)
1680      gzclose(ingz);
1681    else
1682    #endif
1683    
1684    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1685    read failed. If the error indicates that the file isn't in fact bzipped, try
1686    again as a normal file. */
1687    
1688    #ifdef SUPPORT_LIBBZ2
1689    if (frtype == FR_LIBBZ2)
1690      {
1691      if (rc == 2)
1692        {
1693        int errnum;
1694        const char *err = BZ2_bzerror(inbz2, &errnum);
1695        if (errnum == BZ_DATA_ERROR_MAGIC)
1696          {
1697          BZ2_bzclose(inbz2);
1698          goto PLAIN_FILE;
1699          }
1700        else if (!silent)
1701          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1702            pathname, err);
1703        }
1704      BZ2_bzclose(inbz2);
1705      }
1706    else
1707    #endif
1708    
1709    /* Normal file close */
1710    
1711  fclose(in);  fclose(in);
1712    
1713    /* Pass back the yield from pcregrep(). */
1714    
1715  return rc;  return rc;
1716  }  }
1717    
# Line 968  for (op = optionlist; op->one_char != 0; Line 1732  for (op = optionlist; op->one_char != 0;
1732    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1733    }    }
1734  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1735  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1736      "options.\n");
1737  return rc;  return rc;
1738  }  }
1739    
# Line 987  option_item *op; Line 1752  option_item *op;
1752  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1753  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1754  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1755  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1756  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1757    #ifdef SUPPORT_LIBZ
1758    printf("Files whose names end in .gz are read using zlib.\n");
1759    #endif
1760    
1761    #ifdef SUPPORT_LIBBZ2
1762    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1763    #endif
1764    
1765    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1766    printf("Other files and the standard input are read as plain files.\n\n");
1767    #else
1768    printf("All files are read as plain files, without any interpretation.\n\n");
1769    #endif
1770    
1771    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1772  printf("Options:\n");  printf("Options:\n");
1773    
1774  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 997  for (op = optionlist; op->one_char != 0; Line 1776  for (op = optionlist; op->one_char != 0;
1776    int n;    int n;
1777    char s[4];    char s[4];
1778    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1779    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1780    if (n < 1) n = 1;    if (n < 1) n = 1;
1781    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1782    }    }
# Line 1023  handle_option(int letter, int options) Line 1801  handle_option(int letter, int options)
1801  {  {
1802  switch(letter)  switch(letter)
1803    {    {
1804    case N_HELP: help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1805      case N_HELP: help(); pcregrep_exit(0);
1806      case N_LOFFSETS: line_offsets = number = TRUE; break;
1807      case N_LBUFFER: line_buffered = TRUE; break;
1808    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1809    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1810    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1811    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1812    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1813    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1814    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1815    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1816    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
# Line 1037  switch(letter) Line 1818  switch(letter)
1818    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1819    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1820    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1821    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1822    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1823    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1824    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1825    
1826    case 'V':    case 'V':
1827    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1828    fprintf(stderr, "PCRE version %s\n", pcre_version());    pcregrep_exit(0);
   exit(0);  
1829    break;    break;
1830    
1831    default:    default:
1832    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1833    exit(usage(2));    pcregrep_exit(usage(2));
1834    }    }
1835    
1836  return options;  return options;
# Line 1120  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1900  sprintf(buffer, "%s%.*s%s", prefix[proce
1900    suffix[process_options]);    suffix[process_options]);
1901  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1902    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1903  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1904      {
1905      pattern_count++;
1906      return TRUE;
1907      }
1908    
1909  /* Handle compile errors */  /* Handle compile errors */
1910    
# Line 1152  return FALSE; Line 1936  return FALSE;
1936  *************************************************/  *************************************************/
1937    
1938  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1939  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1940    
1941  Arguments:  Arguments:
1942    pattern        the pattern string    pattern        the pattern string
# Line 1170  compile_pattern(char *pattern, int optio Line 1954  compile_pattern(char *pattern, int optio
1954  {  {
1955  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1956    {    {
1957      char *eop = pattern + strlen(pattern);
1958    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1959    for(;;)    for(;;)
1960      {      {
1961      char *p = strchr(pattern, '\n');      int ellength;
1962      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1963        if (ellength == 0)
1964        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1965      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1966      pattern = p + 1;      pattern = p;
1967      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1968        return FALSE;        return FALSE;
1969      }      }
# Line 1200  int i, j; Line 1986  int i, j;
1986  int rc = 1;  int rc = 1;
1987  int pcre_options = 0;  int pcre_options = 0;
1988  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1989    int hint_count = 0;
1990  int errptr;  int errptr;
1991  BOOL only_one_at_top;  BOOL only_one_at_top;
1992  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
1993  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1994  const char *error;  const char *error;
1995    
1996    /* Set the default line ending value from the default in the PCRE library;
1997    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1998    Note that the return values from pcre_config(), though derived from the ASCII
1999    codes, are the same in EBCDIC environments, so we must use the actual values
2000    rather than escapes such as as '\r'. */
2001    
2002    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2003    switch(i)
2004      {
2005      default:               newline = (char *)"lf"; break;
2006      case 13:               newline = (char *)"cr"; break;
2007      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2008      case -1:               newline = (char *)"any"; break;
2009      case -2:               newline = (char *)"anycrlf"; break;
2010      }
2011    
2012  /* Process the options */  /* Process the options */
2013    
2014  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1223  for (i = 1; i < argc; i++) Line 2026  for (i = 1; i < argc; i++)
2026    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2027      {      {
2028      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2029        else exit(usage(2));        else pcregrep_exit(usage(2));
2030      }      }
2031    
2032    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1245  for (i = 1; i < argc; i++) Line 2048  for (i = 1; i < argc; i++)
2048      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2049      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2050      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2051      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2052      these categories, fortunately. */      both these categories. */
2053    
2054      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2055        {        {
2056        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2057        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2058        if (opbra == NULL)     /* Not a (p) case */  
2059          /* Handle options with only one spelling of the name */
2060    
2061          if (opbra == NULL)     /* Does not contain '(' */
2062          {          {
2063          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2064            {            {
# Line 1260  for (i = 1; i < argc; i++) Line 2066  for (i = 1; i < argc; i++)
2066            }            }
2067          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2068            {            {
2069            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2070            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2071                (int)strlen(arg) : (int)(argequals - arg);
2072            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2073              {              {
2074              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1274  for (i = 1; i < argc; i++) Line 2081  for (i = 1; i < argc; i++)
2081              }              }
2082            }            }
2083          }          }
2084        else                   /* Special case xxxx(p) */  
2085          /* Handle options with an alternate spelling of the name */
2086    
2087          else
2088          {          {
2089          char buff1[24];          char buff1[24];
2090          char buff2[24];          char buff2[24];
2091          int baselen = opbra - op->long_name;  
2092            int baselen = (int)(opbra - op->long_name);
2093            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2094            int arglen = (argequals == NULL || equals == NULL)?
2095              (int)strlen(arg) : (int)(argequals - arg);
2096    
2097          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2098          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2099            opbra + 1);  
2100          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2101               strncmp(arg, buff2, arglen) == 0)
2102              {
2103              if (equals != NULL && argequals != NULL)
2104                {
2105                option_data = argequals;
2106                if (*option_data == '=')
2107                  {
2108                  option_data++;
2109                  longopwasequals = TRUE;
2110                  }
2111                }
2112            break;            break;
2113              }
2114          }          }
2115        }        }
2116    
2117      if (op->one_char == 0)      if (op->one_char == 0)
2118        {        {
2119        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2120        exit(usage(2));        pcregrep_exit(usage(2));
2121        }        }
2122      }      }
2123    
2124      /* Jeffrey Friedl's debugging harness uses these additional options which
2125      are not in the right form for putting in the option table because they use
2126      only one hyphen, yet are more than one character long. By putting them
2127      separately here, they will not get displayed as part of the help() output,
2128      but I don't think Jeffrey will care about that. */
2129    
2130    #ifdef JFRIEDL_DEBUG
2131      else if (strcmp(argv[i], "-pre") == 0) {
2132              jfriedl_prefix = argv[++i];
2133              continue;
2134      } else if (strcmp(argv[i], "-post") == 0) {
2135              jfriedl_postfix = argv[++i];
2136              continue;
2137      } else if (strcmp(argv[i], "-XT") == 0) {
2138              sscanf(argv[++i], "%d", &jfriedl_XT);
2139              continue;
2140      } else if (strcmp(argv[i], "-XR") == 0) {
2141              sscanf(argv[++i], "%d", &jfriedl_XR);
2142              continue;
2143      }
2144    #endif
2145    
2146    
2147    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
2148    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
2149    
# Line 1309  for (i = 1; i < argc; i++) Line 2159  for (i = 1; i < argc; i++)
2159          {          {
2160          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2161            *s, argv[i]);            *s, argv[i]);
2162          exit(usage(2));          pcregrep_exit(usage(2));
2163          }          }
2164        if (op->type != OP_NODATA || s[1] == 0)        if (op->type != OP_NODATA || s[1] == 0)
2165          {          {
# Line 1333  for (i = 1; i < argc; i++) Line 2183  for (i = 1; i < argc; i++)
2183    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2184    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2185    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r" and
2186    Jeffrey Friedl's special debugging option. */    Jeffrey Friedl's special -S debugging option. */
2187    
2188    if (*option_data == 0 &&    if (*option_data == 0 &&
2189        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1359  for (i = 1; i < argc; i++) Line 2209  for (i = 1; i < argc; i++)
2209      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2210        {        {
2211        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2212        exit(usage(2));        pcregrep_exit(usage(2));
2213        }        }
2214      option_data = argv[++i];      option_data = argv[++i];
2215      }      }
# Line 1384  for (i = 1; i < argc; i++) Line 2234  for (i = 1; i < argc; i++)
2234      {      {
2235      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2236      }      }
2237    
2238      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2239      only for unpicking arguments, so just keep it simple. */
2240    
2241    else    else
2242      {      {
2243      char *endptr;      unsigned long int n = 0;
2244      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2245        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2246        while (isdigit((unsigned char)(*endptr)))
2247          n = n * 10 + (int)(*endptr++ - '0');
2248      if (*endptr != 0)      if (*endptr != 0)
2249        {        {
2250        if (longop)        if (longop)
2251          {          {
2252          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2253          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2254            equals - op->long_name;            (int)(equals - op->long_name);
2255          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2256            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2257          }          }
2258        else        else
2259          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2260            option_data, op->one_char);            option_data, op->one_char);
2261        exit(usage(2));        pcregrep_exit(usage(2));
2262        }        }
2263      *((int *)op->dataptr) = n;      *((int *)op->dataptr) = n;
2264      }      }
# Line 1416  if (both_context > 0) Line 2273  if (both_context > 0)
2273    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2274    }    }
2275    
2276    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2277    However, the latter two set the only_matching flag. */
2278    
2279    if ((only_matching && (file_offsets || line_offsets)) ||
2280        (file_offsets && line_offsets))
2281      {
2282      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2283        "and/or --line-offsets\n");
2284      pcregrep_exit(usage(2));
2285      }
2286    
2287    if (file_offsets || line_offsets) only_matching = TRUE;
2288    
2289  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2290  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2291    
# Line 1465  if (colour_option != NULL && strcmp(colo Line 2335  if (colour_option != NULL && strcmp(colo
2335      }      }
2336    }    }
2337    
2338    /* Interpret the newline type; the default settings are Unix-like. */
2339    
2340    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2341      {
2342      pcre_options |= PCRE_NEWLINE_CR;
2343      endlinetype = EL_CR;
2344      }
2345    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2346      {
2347      pcre_options |= PCRE_NEWLINE_LF;
2348      endlinetype = EL_LF;
2349      }
2350    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2351      {
2352      pcre_options |= PCRE_NEWLINE_CRLF;
2353      endlinetype = EL_CRLF;
2354      }
2355    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2356      {
2357      pcre_options |= PCRE_NEWLINE_ANY;
2358      endlinetype = EL_ANY;
2359      }
2360    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2361      {
2362      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2363      endlinetype = EL_ANYCRLF;
2364      }
2365    else
2366      {
2367      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2368      return 2;
2369      }
2370    
2371  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
2372    
2373  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1490  if (DEE_option != NULL) Line 2393  if (DEE_option != NULL)
2393      }      }
2394    }    }
2395    
2396  /* Check the value for Jeff Friedl's debugging option. */  /* Check the values for Jeffrey Friedl's debugging options. */
2397    
2398  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2399  if (S_arg > 9)  if (S_arg > 9)
# Line 1498  if (S_arg > 9) Line 2401  if (S_arg > 9)
2401    fprintf(stderr, "pcregrep: bad value for -S option\n");    fprintf(stderr, "pcregrep: bad value for -S option\n");
2402    return 2;    return 2;
2403    }    }
2404    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2405      {
2406      if (jfriedl_XT == 0) jfriedl_XT = 1;
2407      if (jfriedl_XR == 0) jfriedl_XR = 1;
2408      }
2409  #endif  #endif
2410    
2411  /* Get memory to store the pattern and hints lists. */  /* Get memory to store the pattern and hints lists. */
# Line 1508  hints_list = (pcre_extra **)malloc(MAX_P Line 2416  hints_list = (pcre_extra **)malloc(MAX_P
2416  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
2417    {    {
2418    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2419    return 2;    goto EXIT2;
2420    }    }
2421    
2422  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1527  for (j = 0; j < cmd_pattern_count; j++) Line 2435  for (j = 0; j < cmd_pattern_count; j++)
2435    {    {
2436    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
2437         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2438      return 2;      goto EXIT2;
2439    }    }
2440    
2441  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1551  if (pattern_filename != NULL) Line 2459  if (pattern_filename != NULL)
2459        {        {
2460        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2461          strerror(errno));          strerror(errno));
2462        return 2;        goto EXIT2;
2463        }        }
2464      filename = pattern_filename;      filename = pattern_filename;
2465      }      }
# Line 1564  if (pattern_filename != NULL) Line 2472  if (pattern_filename != NULL)
2472      linenumber++;      linenumber++;
2473      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2474      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2475        return 2;        goto EXIT2;
2476      }      }
2477    
2478    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1580  for (j = 0; j < pattern_count; j++) Line 2488  for (j = 0; j < pattern_count; j++)
2488      char s[16];      char s[16];
2489      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2490      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2491      return 2;      goto EXIT2;
2492      }      }
2493      hint_count++;
2494    }    }
2495    
2496    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2497    pcre_extra block for each pattern. */
2498    
2499    if (match_limit > 0 || match_limit_recursion > 0)
2500      {
2501      for (j = 0; j < pattern_count; j++)
2502        {
2503        if (hints_list[j] == NULL)
2504          {
2505          hints_list[j] = malloc(sizeof(pcre_extra));
2506          if (hints_list[j] == NULL)
2507            {
2508            fprintf(stderr, "pcregrep: malloc failed\n");
2509            pcregrep_exit(2);
2510            }
2511          }
2512        if (match_limit > 0)
2513          {
2514          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2515          hints_list[j]->match_limit = match_limit;
2516          }
2517        if (match_limit_recursion > 0)
2518          {
2519          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2520          hints_list[j]->match_limit_recursion = match_limit_recursion;
2521          }
2522        }
2523      }
2524    
2525  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2526    
# Line 1594  if (exclude_pattern != NULL) Line 2532  if (exclude_pattern != NULL)
2532      {      {
2533      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2534        errptr, error);        errptr, error);
2535      return 2;      goto EXIT2;
2536      }      }
2537    }    }
2538    
# Line 1606  if (include_pattern != NULL) Line 2544  if (include_pattern != NULL)
2544      {      {
2545      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2546        errptr, error);        errptr, error);
2547      return 2;      goto EXIT2;
2548        }
2549      }
2550    
2551    if (exclude_dir_pattern != NULL)
2552      {
2553      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2554        pcretables);
2555      if (exclude_dir_compiled == NULL)
2556        {
2557        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2558          errptr, error);
2559        goto EXIT2;
2560        }
2561      }
2562    
2563    if (include_dir_pattern != NULL)
2564      {
2565      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2566        pcretables);
2567      if (include_dir_compiled == NULL)
2568        {
2569        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2570          errptr, error);
2571        goto EXIT2;
2572      }      }
2573    }    }
2574    
2575  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2576    
2577  if (i >= argc)  if (i >= argc)
2578    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2579      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2580      goto EXIT;
2581      }
2582    
2583  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2584  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1630  for (; i < argc; i++) Line 2595  for (; i < argc; i++)
2595      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2596    }    }
2597    
2598  return rc;  EXIT:
2599    if (pattern_list != NULL)
2600      {
2601      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2602      free(pattern_list);
2603      }
2604    if (hints_list != NULL)
2605      {
2606      for (i = 0; i < hint_count; i++)
2607        {
2608        if (hints_list[i] != NULL) free(hints_list[i]);
2609        }
2610      free(hints_list);
2611      }
2612    pcregrep_exit(rc);
2613    
2614    EXIT2:
2615    rc = 2;
2616    goto EXIT;
2617  }  }
2618    
2619  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.87  
changed lines
  Added in v.564

  ViewVC Help
Powered by ViewVC 1.1.5