/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 535 by ph10, Thu Jun 3 19:18:24 2010 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
# Line 45  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.0 07-Jun-2005"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 64  typedef int BOOL; Line 79  typedef int BOOL;
79  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
80  #endif  #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
118  *               Global variables                 *  *               Global variables                 *
119  *************************************************/  *************************************************/
120    
121    /* Jeffrey Friedl has some debugging requirements that are not part of the
122    regular code. */
123    
124    #ifdef JFRIEDL_DEBUG
125    static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130    #endif
131    
132    static int  endlinetype;
133    
134    static char *colour_string = (char *)"1;31";
135    static char *colour_option = NULL;
136    static char *dee_option = NULL;
137    static char *DEE_option = NULL;
138    static char *newline = NULL;
139  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
140  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
141    static char *locale = NULL;
142    
143    static const unsigned char *pcretables = NULL;
144    
145  static int  pattern_count = 0;  static int  pattern_count = 0;
146  static pcre **pattern_list;  static pcre **pattern_list = NULL;
147  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
148    
149  static char *include_pattern = NULL;  static char *include_pattern = NULL;
150  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
155  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159  static int after_context = 0;  static int after_context = 0;
160  static int before_context = 0;  static int before_context = 0;
161  static int both_context = 0;  static int both_context = 0;
162    static int dee_action = dee_READ;
163    static int DEE_action = DEE_READ;
164    static int error_count = 0;
165    static int filenames = FN_DEFAULT;
166    static int process_options = 0;
167    
168  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
169  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
170  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
 static BOOL filenames_nomatch_only = FALSE;  
171  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
172  static BOOL invert = FALSE;  static BOOL invert = FALSE;
173    static BOOL line_buffered = FALSE;
174    static BOOL line_offsets = FALSE;
175  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
176  static BOOL number = FALSE;  static BOOL number = FALSE;
177    static BOOL omit_zero_count = FALSE;
178    static BOOL only_matching = FALSE;
179  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
 static BOOL recurse = FALSE;  
180  static BOOL silent = FALSE;  static BOOL silent = FALSE;
181  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
 static BOOL word_match = FALSE;  
182    
183  /* Structure for options and list of them */  /* Structure for options and list of them */
184    
185  enum { OP_NODATA, OP_STRING, OP_NUMBER };  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
186           OP_PATLIST };
187    
188  typedef struct option_item {  typedef struct option_item {
189    int type;    int type;
# Line 112  typedef struct option_item { Line 193  typedef struct option_item {
193    const char *help_text;    const char *help_text;
194  } option_item;  } option_item;
195    
196    /* Options without a single-letter equivalent get a negative value. This can be
197    used to identify them. */
198    
199    #define N_COLOUR       (-1)
200    #define N_EXCLUDE      (-2)
201    #define N_EXCLUDE_DIR  (-3)
202    #define N_HELP         (-4)
203    #define N_INCLUDE      (-5)
204    #define N_INCLUDE_DIR  (-6)
205    #define N_LABEL        (-7)
206    #define N_LOCALE       (-8)
207    #define N_NULL         (-9)
208    #define N_LOFFSETS     (-10)
209    #define N_FOFFSETS     (-11)
210    #define N_LBUFFER      (-12)
211    
212  static option_item optionlist[] = {  static option_item optionlist[] = {
213    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
214    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
215    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
216    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
217    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
218    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
219    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
220    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
221    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
222    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
223    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
224    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
225    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
226    { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
227    { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
228    { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
229    { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
230    { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
231    { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
232    { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
233    { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },    { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
234    { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
235    { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
236    { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
237    { OP_NODATA, 0,   NULL,               NULL,            NULL }    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
238      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
239      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
240      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
241      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
242      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
243      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
244      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
245      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
246    #ifdef JFRIEDL_DEBUG
247      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
248    #endif
249      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
250      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
251      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
252      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
253      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
254      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
255      { OP_NODATA,    0,        NULL,               NULL,            NULL }
256  };  };
257    
258    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
259    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
260    that the combination of -w and -x has the same effect as -x on its own, so we
261    can treat them as the same. */
262    
263    static const char *prefix[] = {
264      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
265    
266    static const char *suffix[] = {
267      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
268    
269    /* UTF-8 tables - used only when the newline setting is "any". */
270    
271    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
272    
273    const char utf8_table4[] = {
274      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
275      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
276      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
277      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
278    
279    
280    
281  /*************************************************  /*************************************************
282  *       Functions for directory scanning         *  *            OS-specific functions               *
283  *************************************************/  *************************************************/
284    
285  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
286  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
287    
288    
289  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
290    
291  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
292  #include <sys/types.h>  #include <sys/types.h>
293  #include <sys/stat.h>  #include <sys/stat.h>
294  #include <dirent.h>  #include <dirent.h>
# Line 184  for (;;) Line 320  for (;;)
320    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
321      return dent->d_name;      return dent->d_name;
322    }    }
323  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
324  }  }
325    
326  static void  static void
# Line 194  closedir(dir); Line 330  closedir(dir);
330  }  }
331    
332    
333    /************* Test for regular file in Unix **********/
334    
335    static int
336    isregfile(char *filename)
337    {
338    struct stat statbuf;
339    if (stat(filename, &statbuf) < 0)
340      return 1;        /* In the expectation that opening as a file will fail */
341    return (statbuf.st_mode & S_IFMT) == S_IFREG;
342    }
343    
344    
345    /************* Test for a terminal in Unix **********/
346    
347    static BOOL
348    is_stdout_tty(void)
349    {
350    return isatty(fileno(stdout));
351    }
352    
353    static BOOL
354    is_file_tty(FILE *f)
355    {
356    return isatty(fileno(f));
357    }
358    
359    
360  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
361    
362  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
363  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
364  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
365    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
366    */
367    
368  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
369    
370  #ifndef STRICT  #ifndef STRICT
371  # define STRICT  # define STRICT
# Line 209  when it did not exist. */ Line 373  when it did not exist. */
373  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
374  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
375  #endif  #endif
376    
377    #include <windows.h>
378    
379  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
380  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
381  #endif  #endif
382    
 #include <windows.h>  
   
383  typedef struct directory_type  typedef struct directory_type
384  {  {
385  HANDLE handle;  HANDLE handle;
# Line 292  free(dir); Line 457  free(dir);
457  }  }
458    
459    
460    /************* Test for regular file in Win32 **********/
461    
462    /* I don't know how to do this, or if it can be done; assume all paths are
463    regular if they are not directories. */
464    
465    int isregfile(char *filename)
466    {
467    return !isdirectory(filename);
468    }
469    
470    
471    /************* Test for a terminal in Win32 **********/
472    
473    /* I don't know how to do this; assume never */
474    
475    static BOOL
476    is_stdout_tty(void)
477    {
478    return FALSE;
479    }
480    
481    static BOOL
482    is_file_tty(FILE *f)
483    {
484    return FALSE;
485    }
486    
487    
488  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
489    
490  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 300  free(dir); Line 493  free(dir);
493    
494  typedef void directory_type;  typedef void directory_type;
495    
496  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
497  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
498  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
499  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
500    
501    
502    /************* Test for regular when we can't do it **********/
503    
504    /* Assume all files are regular. */
505    
506    int isregfile(char *filename) { return 1; }
507    
508    
509    /************* Test for a terminal when we can't do it **********/
510    
511    static BOOL
512    is_stdout_tty(void)
513    {
514    return FALSE;
515    }
516    
517    static BOOL
518    is_file_tty(FILE *f)
519    {
520    return FALSE;
521    }
522    
523  #endif  #endif
524    
525    
526    
527  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
528  /*************************************************  /*************************************************
529  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
530  *************************************************/  *************************************************/
# Line 332  return sys_errlist[n]; Line 547  return sys_errlist[n];
547    
548    
549  /*************************************************  /*************************************************
550    *            Read one line of input              *
551    *************************************************/
552    
553    /* Normally, input is read using fread() into a large buffer, so many lines may
554    be read at once. However, doing this for tty input means that no output appears
555    until a lot of input has been typed. Instead, tty input is handled line by
556    line. We cannot use fgets() for this, because it does not stop at a binary
557    zero, and therefore there is no way of telling how many characters it has read,
558    because there may be binary zeros embedded in the data.
559    
560    Arguments:
561      buffer     the buffer to read into
562      length     the maximum number of characters to read
563      f          the file
564    
565    Returns:     the number of characters read, zero at end of file
566    */
567    
568    static int
569    read_one_line(char *buffer, int length, FILE *f)
570    {
571    int c;
572    int yield = 0;
573    while ((c = fgetc(f)) != EOF)
574      {
575      buffer[yield++] = c;
576      if (c == '\n' || yield >= length) break;
577      }
578    return yield;
579    }
580    
581    
582    
583    /*************************************************
584    *             Find end of line                   *
585    *************************************************/
586    
587    /* The length of the endline sequence that is found is set via lenptr. This may
588    be zero at the very end of the file if there is no line-ending sequence there.
589    
590    Arguments:
591      p         current position in line
592      endptr    end of available data
593      lenptr    where to put the length of the eol sequence
594    
595    Returns:    pointer to the last byte of the line
596    */
597    
598    static char *
599    end_of_line(char *p, char *endptr, int *lenptr)
600    {
601    switch(endlinetype)
602      {
603      default:      /* Just in case */
604      case EL_LF:
605      while (p < endptr && *p != '\n') p++;
606      if (p < endptr)
607        {
608        *lenptr = 1;
609        return p + 1;
610        }
611      *lenptr = 0;
612      return endptr;
613    
614      case EL_CR:
615      while (p < endptr && *p != '\r') p++;
616      if (p < endptr)
617        {
618        *lenptr = 1;
619        return p + 1;
620        }
621      *lenptr = 0;
622      return endptr;
623    
624      case EL_CRLF:
625      for (;;)
626        {
627        while (p < endptr && *p != '\r') p++;
628        if (++p >= endptr)
629          {
630          *lenptr = 0;
631          return endptr;
632          }
633        if (*p == '\n')
634          {
635          *lenptr = 2;
636          return p + 1;
637          }
638        }
639      break;
640    
641      case EL_ANYCRLF:
642      while (p < endptr)
643        {
644        int extra = 0;
645        register int c = *((unsigned char *)p);
646    
647        if (utf8 && c >= 0xc0)
648          {
649          int gcii, gcss;
650          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
651          gcss = 6*extra;
652          c = (c & utf8_table3[extra]) << gcss;
653          for (gcii = 1; gcii <= extra; gcii++)
654            {
655            gcss -= 6;
656            c |= (p[gcii] & 0x3f) << gcss;
657            }
658          }
659    
660        p += 1 + extra;
661    
662        switch (c)
663          {
664          case 0x0a:    /* LF */
665          *lenptr = 1;
666          return p;
667    
668          case 0x0d:    /* CR */
669          if (p < endptr && *p == 0x0a)
670            {
671            *lenptr = 2;
672            p++;
673            }
674          else *lenptr = 1;
675          return p;
676    
677          default:
678          break;
679          }
680        }   /* End of loop for ANYCRLF case */
681    
682      *lenptr = 0;  /* Must have hit the end */
683      return endptr;
684    
685      case EL_ANY:
686      while (p < endptr)
687        {
688        int extra = 0;
689        register int c = *((unsigned char *)p);
690    
691        if (utf8 && c >= 0xc0)
692          {
693          int gcii, gcss;
694          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
695          gcss = 6*extra;
696          c = (c & utf8_table3[extra]) << gcss;
697          for (gcii = 1; gcii <= extra; gcii++)
698            {
699            gcss -= 6;
700            c |= (p[gcii] & 0x3f) << gcss;
701            }
702          }
703    
704        p += 1 + extra;
705    
706        switch (c)
707          {
708          case 0x0a:    /* LF */
709          case 0x0b:    /* VT */
710          case 0x0c:    /* FF */
711          *lenptr = 1;
712          return p;
713    
714          case 0x0d:    /* CR */
715          if (p < endptr && *p == 0x0a)
716            {
717            *lenptr = 2;
718            p++;
719            }
720          else *lenptr = 1;
721          return p;
722    
723          case 0x85:    /* NEL */
724          *lenptr = utf8? 2 : 1;
725          return p;
726    
727          case 0x2028:  /* LS */
728          case 0x2029:  /* PS */
729          *lenptr = 3;
730          return p;
731    
732          default:
733          break;
734          }
735        }   /* End of loop for ANY case */
736    
737      *lenptr = 0;  /* Must have hit the end */
738      return endptr;
739      }     /* End of overall switch */
740    }
741    
742    
743    
744    /*************************************************
745    *         Find start of previous line            *
746    *************************************************/
747    
748    /* This is called when looking back for before lines to print.
749    
750    Arguments:
751      p         start of the subsequent line
752      startptr  start of available data
753    
754    Returns:    pointer to the start of the previous line
755    */
756    
757    static char *
758    previous_line(char *p, char *startptr)
759    {
760    switch(endlinetype)
761      {
762      default:      /* Just in case */
763      case EL_LF:
764      p--;
765      while (p > startptr && p[-1] != '\n') p--;
766      return p;
767    
768      case EL_CR:
769      p--;
770      while (p > startptr && p[-1] != '\n') p--;
771      return p;
772    
773      case EL_CRLF:
774      for (;;)
775        {
776        p -= 2;
777        while (p > startptr && p[-1] != '\n') p--;
778        if (p <= startptr + 1 || p[-2] == '\r') return p;
779        }
780      return p;   /* But control should never get here */
781    
782      case EL_ANY:
783      case EL_ANYCRLF:
784      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
785      if (utf8) while ((*p & 0xc0) == 0x80) p--;
786    
787      while (p > startptr)
788        {
789        register int c;
790        char *pp = p - 1;
791    
792        if (utf8)
793          {
794          int extra = 0;
795          while ((*pp & 0xc0) == 0x80) pp--;
796          c = *((unsigned char *)pp);
797          if (c >= 0xc0)
798            {
799            int gcii, gcss;
800            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
801            gcss = 6*extra;
802            c = (c & utf8_table3[extra]) << gcss;
803            for (gcii = 1; gcii <= extra; gcii++)
804              {
805              gcss -= 6;
806              c |= (pp[gcii] & 0x3f) << gcss;
807              }
808            }
809          }
810        else c = *((unsigned char *)pp);
811    
812        if (endlinetype == EL_ANYCRLF) switch (c)
813          {
814          case 0x0a:    /* LF */
815          case 0x0d:    /* CR */
816          return p;
817    
818          default:
819          break;
820          }
821    
822        else switch (c)
823          {
824          case 0x0a:    /* LF */
825          case 0x0b:    /* VT */
826          case 0x0c:    /* FF */
827          case 0x0d:    /* CR */
828          case 0x85:    /* NEL */
829          case 0x2028:  /* LS */
830          case 0x2029:  /* PS */
831          return p;
832    
833          default:
834          break;
835          }
836    
837        p = pp;  /* Back one character */
838        }        /* End of loop for ANY case */
839    
840      return startptr;  /* Hit start of data */
841      }     /* End of overall switch */
842    }
843    
844    
845    
846    
847    
848    /*************************************************
849  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
850  *************************************************/  *************************************************/
851    
852  /* This is called if we are about to lose said lines because of buffer filling,  /* This is called if we are about to lose said lines because of buffer filling,
853  and at the end of the file.  and at the end of the file. The data in the line is written using fwrite() so
854    that a binary zero does not terminate it.
855    
856  Arguments:  Arguments:
857    lastmatchnumber   the number of the last matching line, plus one    lastmatchnumber   the number of the last matching line, plus one
# Line 355  if (after_context > 0 && lastmatchnumber Line 870  if (after_context > 0 && lastmatchnumber
870    int count = 0;    int count = 0;
871    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
872      {      {
873        int ellength;
874      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
875      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
876      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
877      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
878      fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
879      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
880      }      }
881    hyphenpending = TRUE;    hyphenpending = TRUE;
882    }    }
# Line 369  if (after_context > 0 && lastmatchnumber Line 885  if (after_context > 0 && lastmatchnumber
885    
886    
887  /*************************************************  /*************************************************
888    *   Apply patterns to subject till one matches   *
889    *************************************************/
890    
891    /* This function is called to run through all patterns, looking for a match. It
892    is used multiple times for the same subject when colouring is enabled, in order
893    to find all possible matches.
894    
895    Arguments:
896      matchptr    the start of the subject
897      length      the length of the subject to match
898      offsets     the offets vector to fill in
899      mrc         address of where to put the result of pcre_exec()
900    
901    Returns:      TRUE if there was a match
902                  FALSE if there was no match
903                  invert if there was a non-fatal error
904    */
905    
906    static BOOL
907    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
908    {
909    int i;
910    for (i = 0; i < pattern_count; i++)
911      {
912      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
913        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
914      if (*mrc >= 0) return TRUE;
915      if (*mrc == PCRE_ERROR_NOMATCH) continue;
916      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
917      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
918      fprintf(stderr, "this text:\n");
919      FWRITE(matchptr, 1, length, stderr);   /* In case binary zero included */
920      fprintf(stderr, "\n");
921      if (error_count == 0 &&
922          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
923        {
924        fprintf(stderr, "pcregrep: error %d means that a resource limit "
925          "was exceeded\n", *mrc);
926        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
927        }
928      if (error_count++ > 20)
929        {
930        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
931        exit(2);
932        }
933      return invert;    /* No more matching; don't show the line again */
934      }
935    
936    return FALSE;  /* No match, no errors */
937    }
938    
939    
940    
941    /*************************************************
942  *            Grep an individual file             *  *            Grep an individual file             *
943  *************************************************/  *************************************************/
944    
# Line 380  be in the middle third most of the time, Line 950  be in the middle third most of the time,
950  "before" context printing.  "before" context printing.
951    
952  Arguments:  Arguments:
953    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
954                   the gzFile pointer when reading is via libz
955                   the BZFILE pointer when reading is via libbz2
956      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
957    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
958                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
959                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
960    
961  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
962                 1 otherwise (no matches)                 1 otherwise (no matches)
963                   2 if there is a read error on a .bz2 file
964  */  */
965    
966  static int  static int
967  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
968  {  {
969  int rc = 1;  int rc = 1;
970  int linenumber = 1;  int linenumber = 1;
971  int lastmatchnumber = 0;  int lastmatchnumber = 0;
972  int count = 0;  int count = 0;
973  int offsets[99];  int filepos = 0;
974    int offsets[OFFSET_SIZE];
975  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
976  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
977  char *ptr = buffer;  char *ptr = buffer;
978  char *endptr;  char *endptr;
979  size_t bufflength;  size_t bufflength;
980  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
981    BOOL input_line_buffered = line_buffered;
982    FILE *in = NULL;                    /* Ensure initialized */
983    
984    #ifdef SUPPORT_LIBZ
985    gzFile ingz = NULL;
986    #endif
987    
988    #ifdef SUPPORT_LIBBZ2
989    BZFILE *inbz2 = NULL;
990    #endif
991    
992    
993  /* Do the first read into the start of the buffer and set up the pointer to  /* Do the first read into the start of the buffer and set up the pointer to end
994  end of what we have. */  of what we have. In the case of libz, a non-zipped .gz file will be read as a
995    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
996    fail. */
997    
998    #ifdef SUPPORT_LIBZ
999    if (frtype == FR_LIBZ)
1000      {
1001      ingz = (gzFile)handle;
1002      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1003      }
1004    else
1005    #endif
1006    
1007    #ifdef SUPPORT_LIBBZ2
1008    if (frtype == FR_LIBBZ2)
1009      {
1010      inbz2 = (BZFILE *)handle;
1011      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1012      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1013      }                                    /* without the cast it is unsigned. */
1014    else
1015    #endif
1016    
1017      {
1018      in = (FILE *)handle;
1019      if (is_file_tty(in)) input_line_buffered = TRUE;
1020      bufflength = input_line_buffered?
1021        read_one_line(buffer, 3*MBUFTHIRD, in) :
1022        fread(buffer, 1, 3*MBUFTHIRD, in);
1023      }
1024    
 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  
1025  endptr = buffer + bufflength;  endptr = buffer + bufflength;
1026    
1027  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 417  way, the buffer is shifted left and re-f Line 1031  way, the buffer is shifted left and re-f
1031    
1032  while (ptr < endptr)  while (ptr < endptr)
1033    {    {
1034    int i;    int endlinelength;
1035    BOOL match = FALSE;    int mrc = 0;
1036      BOOL match;
1037      char *matchptr = ptr;
1038    char *t = ptr;    char *t = ptr;
1039    size_t length, linelength;    size_t length, linelength;
1040    
1041    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1042    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1043    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1044    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1045    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1046    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1047      first line. */
1048    
1049      t = end_of_line(t, endptr, &endlinelength);
1050      linelength = t - ptr - endlinelength;
1051      length = multiline? (size_t)(endptr - ptr) : linelength;
1052    
1053      /* Extra processing for Jeffrey Friedl's debugging. */
1054    
1055    #ifdef JFRIEDL_DEBUG
1056      if (jfriedl_XT || jfriedl_XR)
1057      {
1058          #include <sys/time.h>
1059          #include <time.h>
1060          struct timeval start_time, end_time;
1061          struct timezone dummy;
1062          int i;
1063    
1064          if (jfriedl_XT)
1065          {
1066              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1067              const char *orig = ptr;
1068              ptr = malloc(newlen + 1);
1069              if (!ptr) {
1070                      printf("out of memory");
1071                      exit(2);
1072              }
1073              endptr = ptr;
1074              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1075              for (i = 0; i < jfriedl_XT; i++) {
1076                      strncpy(endptr, orig,  length);
1077                      endptr += length;
1078              }
1079              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1080              length = newlen;
1081          }
1082    
1083    linelength = 0;        if (gettimeofday(&start_time, &dummy) != 0)
1084    while (t < endptr && *t++ != '\n') linelength++;                perror("bad gettimeofday");
   length = multiline? endptr - ptr : linelength;  
1085    
   /* Run through all the patterns until one matches. Note that we don't include  
   the final newline in the subject string. */  
1086    
1087    for (i = 0; !match && i < pattern_count; i++)        for (i = 0; i < jfriedl_XR; i++)
1088      {            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1089      match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,                PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1090        offsets, 99) >= 0;  
1091      }        if (gettimeofday(&end_time, &dummy) != 0)
1092                  perror("bad gettimeofday");
1093    
1094          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1095                          -
1096                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1097    
1098          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1099          return 0;
1100      }
1101    #endif
1102    
1103      /* We come back here after a match when the -o option (only_matching) is set,
1104      in order to find any further matches in the same line. */
1105    
1106      ONLY_MATCHING_RESTART:
1107    
1108      /* Run through all the patterns until one matches or there is an error other
1109      than NOMATCH. This code is in a subroutine so that it can be re-used for
1110      finding subsequent matches when colouring matched lines. */
1111    
1112    /* If it's a match or a not-match (as required), print what's wanted. */    match = match_patterns(matchptr, length, offsets, &mrc);
1113    
1114      /* If it's a match or a not-match (as required), do what's wanted. */
1115    
1116    if (match != invert)    if (match != invert)
1117      {      {
1118      BOOL hyphenprinted = FALSE;      BOOL hyphenprinted = FALSE;
1119    
1120      if (filenames_nomatch_only) return 1;      /* We've failed if we want a file that doesn't have any matches. */
1121    
1122        if (filenames == FN_NOMATCH_ONLY) return 1;
1123    
1124        /* Just count if just counting is wanted. */
1125    
1126      if (count_only) count++;      if (count_only) count++;
1127    
1128      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
1129        in the file. */
1130    
1131        else if (filenames == FN_MATCH_ONLY)
1132        {        {
1133        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1134        return 0;        return 0;
1135        }        }
1136    
1137        /* Likewise, if all we want is a yes/no answer. */
1138    
1139      else if (quiet) return 0;      else if (quiet) return 0;
1140    
1141        /* The --only-matching option prints just the substring that matched, and
1142        the --file-offsets and --line-offsets options output offsets for the
1143        matching substring (they both force --only-matching). None of these options
1144        prints any context. Afterwards, adjust the start and length, and then jump
1145        back to look for further matches in the same line. If we are in invert
1146        mode, however, nothing is printed - this could be still useful because the
1147        return code is set. */
1148    
1149        else if (only_matching)
1150          {
1151          if (!invert)
1152            {
1153            if (printname != NULL) fprintf(stdout, "%s:", printname);
1154            if (number) fprintf(stdout, "%d:", linenumber);
1155            if (line_offsets)
1156              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1157                offsets[1] - offsets[0]);
1158            else if (file_offsets)
1159              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1160                offsets[1] - offsets[0]);
1161            else
1162              {
1163              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1164              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1165              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1166              }
1167            fprintf(stdout, "\n");
1168            matchptr += offsets[1];
1169            length -= offsets[1];
1170            match = FALSE;
1171            goto ONLY_MATCHING_RESTART;
1172            }
1173          }
1174    
1175        /* This is the default case when none of the above options is set. We print
1176        the matching lines(s), possibly preceded and/or followed by other lines of
1177        context. */
1178    
1179      else      else
1180        {        {
1181        /* See if there is a requirement to print some "after" lines from a        /* See if there is a requirement to print some "after" lines from a
# Line 467  while (ptr < endptr) Line 1183  while (ptr < endptr)
1183    
1184        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1185          {          {
1186            int ellength;
1187          int linecount = 0;          int linecount = 0;
1188          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1189    
1190          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1191            {            {
1192            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1193            linecount++;            linecount++;
1194            }            }
1195    
1196          /* It is important to advance lastmatchrestart during this printing so          /* It is important to advance lastmatchrestart during this printing so
1197          that it interacts correctly with any "before" printing below. */          that it interacts correctly with any "before" printing below. Print
1198            each line's data using fwrite() in case there are binary zeroes. */
1199    
1200          while (lastmatchrestart < p)          while (lastmatchrestart < p)
1201            {            {
1202            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1203            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1204            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1205            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1206            fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1207            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1208            }            }
1209          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1210          }          }
# Line 510  while (ptr < endptr) Line 1227  while (ptr < endptr)
1227          char *p = ptr;          char *p = ptr;
1228    
1229          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1230                 linecount++ < before_context)                 linecount < before_context)
1231            {            {
1232            p--;            linecount++;
1233            while (p > buffer && p[-1] != '\n') p--;            p = previous_line(p, buffer);
1234            }            }
1235    
1236          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 521  while (ptr < endptr) Line 1238  while (ptr < endptr)
1238    
1239          while (p < ptr)          while (p < ptr)
1240            {            {
1241              int ellength;
1242            char *pp = p;            char *pp = p;
1243            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1244            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1245            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1246            fprintf(stdout, "%.*s", pp - p + 1, p);            FWRITE(p, 1, pp - p, stdout);
1247            p = pp + 1;            p = pp;
1248            }            }
1249          }          }
1250    
1251        /* Now print the matching line(s); ensure we set hyphenpending at the end        /* Now print the matching line(s); ensure we set hyphenpending at the end
1252        of the file. */        of the file if any context lines are being output. */
1253    
1254          if (after_context > 0 || before_context > 0)
1255            endhyphenpending = TRUE;
1256    
       endhyphenpending = TRUE;  
1257        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (printname != NULL) fprintf(stdout, "%s:", printname);
1258        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
1259    
1260        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1261        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1262        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1263        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1264          the match will always be before the first newline sequence. */
1265    
1266        if (multiline)        if (multiline)
1267          {          {
1268          char *endmatch = ptr + offsets[1];          int ellength;
1269          t = ptr;          char *endmatch = ptr;
1270          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          if (!invert)
1271          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1272          linelength = endmatch - ptr;            endmatch += offsets[1];
1273              t = ptr;
1274              while (t < endmatch)
1275                {
1276                t = end_of_line(t, endptr, &ellength);
1277                if (t <= endmatch) linenumber++; else break;
1278                }
1279              }
1280            endmatch = end_of_line(endmatch, endptr, &ellength);
1281            linelength = endmatch - ptr - ellength;
1282          }          }
1283    
1284        fprintf(stdout, "%.*s\n", linelength, ptr);        /*** NOTE: Use only fwrite() to output the data line, so that binary
1285          zeroes are treated as just another data character. */
1286    
1287          /* This extra option, for Jeffrey Friedl's debugging requirements,
1288          replaces the matched string, or a specific captured string if it exists,
1289          with X. When this happens, colouring is ignored. */
1290    
1291    #ifdef JFRIEDL_DEBUG
1292          if (S_arg >= 0 && S_arg < mrc)
1293            {
1294            int first = S_arg * 2;
1295            int last  = first + 1;
1296            FWRITE(ptr, 1, offsets[first], stdout);
1297            fprintf(stdout, "X");
1298            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1299            }
1300          else
1301    #endif
1302    
1303          /* We have to split the line(s) up if colouring, and search for further
1304          matches. */
1305    
1306          if (do_colour)
1307            {
1308            int last_offset = 0;
1309            FWRITE(ptr, 1, offsets[0], stdout);
1310            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1311            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1312            fprintf(stdout, "%c[00m", 0x1b);
1313            for (;;)
1314              {
1315              last_offset += offsets[1];
1316              matchptr += offsets[1];
1317              length -= offsets[1];
1318              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1319              FWRITE(matchptr, 1, offsets[0], stdout);
1320              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1321              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1322              fprintf(stdout, "%c[00m", 0x1b);
1323              }
1324            FWRITE(ptr + last_offset, 1,
1325              (linelength + endlinelength) - last_offset, stdout);
1326            }
1327    
1328          /* Not colouring; no need to search for further matches */
1329    
1330          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1331        }        }
1332    
1333        /* End of doing what has to be done for a match. If --line-buffered was
1334        given, flush the output. */
1335    
1336        if (line_buffered) fflush(stdout);
1337      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1338    
1339      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1340      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1341    
1342      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1343      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1344      }      }
1345    
1346    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1347      anything to be printed), we have to move on to the end of the match before
1348      proceeding. */
1349    
1350      if (multiline && invert && match)
1351        {
1352        int ellength;
1353        char *endmatch = ptr + offsets[1];
1354        t = ptr;
1355        while (t < endmatch)
1356          {
1357          t = end_of_line(t, endptr, &ellength);
1358          if (t <= endmatch) linenumber++; else break;
1359          }
1360        endmatch = end_of_line(endmatch, endptr, &ellength);
1361        linelength = endmatch - ptr - ellength;
1362        }
1363    
1364      /* Advance to after the newline and increment the line number. The file
1365      offset to the current line is maintained in filepos. */
1366    
1367    ptr += linelength + 1;    ptr += linelength + endlinelength;
1368      filepos += (int)(linelength + endlinelength);
1369    linenumber++;    linenumber++;
1370    
1371      /* If input is line buffered, and the buffer is not yet full, read another
1372      line and add it into the buffer. */
1373    
1374      if (input_line_buffered && bufflength < sizeof(buffer))
1375        {
1376        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1377        bufflength += add;
1378        endptr += add;
1379        }
1380    
1381    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1382    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1383    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
# Line 587  while (ptr < endptr) Line 1397  while (ptr < endptr)
1397    
1398      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1399      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1400      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1401    #ifdef SUPPORT_LIBZ
1402        if (frtype == FR_LIBZ)
1403          bufflength = 2*MBUFTHIRD +
1404            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1405        else
1406    #endif
1407    
1408    #ifdef SUPPORT_LIBBZ2
1409        if (frtype == FR_LIBBZ2)
1410          bufflength = 2*MBUFTHIRD +
1411            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1412        else
1413    #endif
1414    
1415        bufflength = 2*MBUFTHIRD +
1416          (input_line_buffered?
1417           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1418           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1419      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1420    
1421      /* Adjust any last match point */      /* Adjust any last match point */
# Line 599  while (ptr < endptr) Line 1427  while (ptr < endptr)
1427  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1428  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1429    
1430  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);  if (!only_matching && !count_only)
1431  hyphenpending |= endhyphenpending;    {
1432      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1433      hyphenpending |= endhyphenpending;
1434      }
1435    
1436  /* Print the file name if we are looking for those without matches and there  /* Print the file name if we are looking for those without matches and there
1437  were none. If we found a match, we won't have got this far. */  were none. If we found a match, we won't have got this far. */
1438    
1439  if (filenames_nomatch_only)  if (filenames == FN_NOMATCH_ONLY)
1440    {    {
1441    fprintf(stdout, "%s\n", printname);    fprintf(stdout, "%s\n", printname);
1442    return 0;    return 0;
# Line 615  if (filenames_nomatch_only) Line 1446  if (filenames_nomatch_only)
1446    
1447  if (count_only)  if (count_only)
1448    {    {
1449    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1450    fprintf(stdout, "%d\n", count);      {
1451        if (printname != NULL && filenames != FN_NONE)
1452          fprintf(stdout, "%s:", printname);
1453        fprintf(stdout, "%d\n", count);
1454        }
1455    }    }
1456    
1457  return rc;  return rc;
# Line 633  recursing; if it's a file, grep it. Line 1468  recursing; if it's a file, grep it.
1468    
1469  Arguments:  Arguments:
1470    pathname          the path to investigate    pathname          the path to investigate
1471    dir_recurse       TRUE if recursing is wanted (-r)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   show_filenames    TRUE if file names are wanted for multiple files, except  
                       for the only file at top level when not filenames_only  
1472    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1473    
1474  Returns:   0 if there was at least one match  Returns:   0 if there was at least one match
# Line 646  However, file opening failures are suppr Line 1479  However, file opening failures are suppr
1479  */  */
1480    
1481  static int  static int
1482  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
1483  {  {
1484  int rc = 1;  int rc = 1;
1485  int sep;  int sep;
1486  FILE *in;  int frtype;
1487  char *printname;  int pathlen;
1488    void *handle;
1489    FILE *in = NULL;           /* Ensure initialized */
1490    
1491    #ifdef SUPPORT_LIBZ
1492    gzFile ingz = NULL;
1493    #endif
1494    
1495    #ifdef SUPPORT_LIBBZ2
1496    BZFILE *inbz2 = NULL;
1497    #endif
1498    
1499  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1500    
1501  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1502    {    {
1503    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1504      (filenames_only || filenames_nomatch_only ||      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
     (show_filenames && !only_one_at_top))?  
1505        stdin_name : NULL);        stdin_name : NULL);
1506    }    }
1507    
1508  /* If the file is a directory and we are recursing, scan each file within it,  /* If the file is a directory, skip if skipping or if we are recursing, scan
1509  subject to any include or exclude patterns that were set. The scanning code is  each file and directory within it, subject to any include or exclude patterns
1510  localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1511    system-specific. */
1512    
1513  if ((sep = isdirectory(pathname)) != 0 && dir_recurse)  if ((sep = isdirectory(pathname)) != 0)
1514    {    {
1515    char buffer[1024];    if (dee_action == dee_SKIP) return 1;
1516    char *nextfile;    if (dee_action == dee_RECURSE)
1517    directory_type *dir = opendirectory(pathname);      {
1518        char buffer[1024];
1519        char *nextfile;
1520        directory_type *dir = opendirectory(pathname);
1521    
1522        if (dir == NULL)
1523          {
1524          if (!silent)
1525            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1526              strerror(errno));
1527          return 2;
1528          }
1529    
1530    if (dir == NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1531          {
1532          int frc, nflen;
1533          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1534          nflen = (int)(strlen(nextfile));
1535    
1536          if (isdirectory(buffer))
1537            {
1538            if (exclude_dir_compiled != NULL &&
1539                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1540              continue;
1541    
1542            if (include_dir_compiled != NULL &&
1543                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1544              continue;
1545            }
1546          else
1547            {
1548            if (exclude_compiled != NULL &&
1549                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1550              continue;
1551    
1552            if (include_compiled != NULL &&
1553                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1554              continue;
1555            }
1556    
1557          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1558          if (frc > 1) rc = frc;
1559           else if (frc == 0 && rc == 1) rc = 0;
1560          }
1561    
1562        closedirectory(dir);
1563        return rc;
1564        }
1565      }
1566    
1567    /* If the file is not a directory and not a regular file, skip it if that's
1568    been requested. */
1569    
1570    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1571    
1572    /* Control reaches here if we have a regular file, or if we have a directory
1573    and recursion or skipping was not requested, or if we have anything else and
1574    skipping was not requested. The scan proceeds. If this is the first and only
1575    argument at top level, we don't show the file name, unless we are only showing
1576    the file name, or the filename was forced (-H). */
1577    
1578    pathlen = (int)(strlen(pathname));
1579    
1580    /* Open using zlib if it is supported and the file name ends with .gz. */
1581    
1582    #ifdef SUPPORT_LIBZ
1583    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1584      {
1585      ingz = gzopen(pathname, "rb");
1586      if (ingz == NULL)
1587      {      {
1588      if (!silent)      if (!silent)
1589        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1590          strerror(errno));          strerror(errno));
1591      return 2;      return 2;
1592      }      }
1593      handle = (void *)ingz;
1594      frtype = FR_LIBZ;
1595      }
1596    else
1597    #endif
1598    
1599    while ((nextfile = readdirectory(dir)) != NULL)  /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
     {  
     int frc, blen;  
     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);  
     blen = strlen(buffer);  
   
     if (exclude_compiled != NULL &&  
         pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
       continue;  
1600    
1601      if (include_compiled != NULL &&  #ifdef SUPPORT_LIBBZ2
1602          pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1603        continue;    {
1604      inbz2 = BZ2_bzopen(pathname, "rb");
1605      handle = (void *)inbz2;
1606      frtype = FR_LIBBZ2;
1607      }
1608    else
1609    #endif
1610    
1611      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);  /* Otherwise use plain fopen(). The label is so that we can come back here if
1612      if (frc > 1) rc = frc;  an attempt to read a .bz2 file indicates that it really is a plain file. */
      else if (frc == 0 && rc == 1) rc = 0;  
     }  
1613    
1614    closedirectory(dir);  #ifdef SUPPORT_LIBBZ2
1615    return rc;  PLAIN_FILE:
1616    #endif
1617      {
1618      in = fopen(pathname, "rb");
1619      handle = (void *)in;
1620      frtype = FR_PLAIN;
1621    }    }
1622    
1623  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* All the opening methods return errno when they fail. */
 the first and only argument at top level, we don't show the file name (unless  
 we are only showing the file name). Otherwise, control is via the  
 show_filenames variable. */  
1624    
1625  in = fopen(pathname, "r");  if (handle == NULL)
 if (in == NULL)  
1626    {    {
1627    if (!silent)    if (!silent)
1628      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 719  if (in == NULL) Line 1630  if (in == NULL)
1630    return 2;    return 2;
1631    }    }
1632    
1633  printname =  (filenames_only || filenames_nomatch_only ||  /* Now grep the file */
1634    (show_filenames && !only_one_at_top))? pathname : NULL;  
1635    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1636      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1637    
1638    /* Close in an appropriate manner. */
1639    
1640    #ifdef SUPPORT_LIBZ
1641    if (frtype == FR_LIBZ)
1642      gzclose(ingz);
1643    else
1644    #endif
1645    
1646    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1647    read failed. If the error indicates that the file isn't in fact bzipped, try
1648    again as a normal file. */
1649    
1650    #ifdef SUPPORT_LIBBZ2
1651    if (frtype == FR_LIBBZ2)
1652      {
1653      if (rc == 2)
1654        {
1655        int errnum;
1656        const char *err = BZ2_bzerror(inbz2, &errnum);
1657        if (errnum == BZ_DATA_ERROR_MAGIC)
1658          {
1659          BZ2_bzclose(inbz2);
1660          goto PLAIN_FILE;
1661          }
1662        else if (!silent)
1663          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1664            pathname, err);
1665        }
1666      BZ2_bzclose(inbz2);
1667      }
1668    else
1669    #endif
1670    
1671  rc = pcregrep(in, printname);  /* Normal file close */
1672    
1673  fclose(in);  fclose(in);
1674    
1675    /* Pass back the yield from pcregrep(). */
1676    
1677  return rc;  return rc;
1678  }  }
1679    
# Line 738  return rc; Line 1687  return rc;
1687  static int  static int
1688  usage(int rc)  usage(int rc)
1689  {  {
1690  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");  option_item *op;
1691  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Usage: pcregrep [-");
1692    for (op = optionlist; op->one_char != 0; op++)
1693      {
1694      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1695      }
1696    fprintf(stderr, "] [long options] [pattern] [files]\n");
1697    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1698      "options.\n");
1699  return rc;  return rc;
1700  }  }
1701    
# Line 757  option_item *op; Line 1713  option_item *op;
1713    
1714  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1715  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1716  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1717  printf("\"-\" can be used as a file name to mean STDIN.\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1718    
1719    #ifdef SUPPORT_LIBZ
1720    printf("Files whose names end in .gz are read using zlib.\n");
1721    #endif
1722    
1723    #ifdef SUPPORT_LIBBZ2
1724    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1725    #endif
1726    
1727    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1728    printf("Other files and the standard input are read as plain files.\n\n");
1729    #else
1730    printf("All files are read as plain files, without any interpretation.\n\n");
1731    #endif
1732    
1733    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1734  printf("Options:\n");  printf("Options:\n");
1735    
1736  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 768  for (op = optionlist; op->one_char != 0; Line 1738  for (op = optionlist; op->one_char != 0;
1738    int n;    int n;
1739    char s[4];    char s[4];
1740    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1741    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1742    if (n < 1) n = 1;    if (n < 1) n = 1;
1743    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1744    }    }
# Line 794  handle_option(int letter, int options) Line 1763  handle_option(int letter, int options)
1763  {  {
1764  switch(letter)  switch(letter)
1765    {    {
1766    case -1:  help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1767      case N_HELP: help(); exit(0);
1768      case N_LOFFSETS: line_offsets = number = TRUE; break;
1769      case N_LBUFFER: line_buffered = TRUE; break;
1770    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1771    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1772      case 'H': filenames = FN_FORCE; break;
1773      case 'h': filenames = FN_NONE; break;
1774    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1775    case 'l': filenames_only = TRUE; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1776    case 'L': filenames_nomatch_only = TRUE; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1777    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1778    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1779      case 'o': only_matching = TRUE; break;
1780    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1781    case 'r': recurse = TRUE; break;    case 'r': dee_action = dee_RECURSE; break;
1782    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1783    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1784    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1785    case 'w': word_match = TRUE; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1786    case 'x': whole_lines = TRUE; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1787    
1788    case 'V':    case 'V':
1789    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1790    exit(0);    exit(0);
1791    break;    break;
1792    
# Line 828  return options; Line 1802  return options;
1802    
1803    
1804  /*************************************************  /*************************************************
1805    *          Construct printed ordinal             *
1806    *************************************************/
1807    
1808    /* This turns a number into "1st", "3rd", etc. */
1809    
1810    static char *
1811    ordin(int n)
1812    {
1813    static char buffer[8];
1814    char *p = buffer;
1815    sprintf(p, "%d", n);
1816    while (*p != 0) p++;
1817    switch (n%10)
1818      {
1819      case 1: strcpy(p, "st"); break;
1820      case 2: strcpy(p, "nd"); break;
1821      case 3: strcpy(p, "rd"); break;
1822      default: strcpy(p, "th"); break;
1823      }
1824    return buffer;
1825    }
1826    
1827    
1828    
1829    /*************************************************
1830    *          Compile a single pattern              *
1831    *************************************************/
1832    
1833    /* When the -F option has been used, this is called for each substring.
1834    Otherwise it's called for each supplied pattern.
1835    
1836    Arguments:
1837      pattern        the pattern string
1838      options        the PCRE options
1839      filename       the file name, or NULL for a command-line pattern
1840      count          0 if this is the only command line pattern, or
1841                     number of the command line pattern, or
1842                     linenumber for a pattern from a file
1843    
1844    Returns:         TRUE on success, FALSE after an error
1845    */
1846    
1847    static BOOL
1848    compile_single_pattern(char *pattern, int options, char *filename, int count)
1849    {
1850    char buffer[MBUFTHIRD + 16];
1851    const char *error;
1852    int errptr;
1853    
1854    if (pattern_count >= MAX_PATTERN_COUNT)
1855      {
1856      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1857        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1858      return FALSE;
1859      }
1860    
1861    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1862      suffix[process_options]);
1863    pattern_list[pattern_count] =
1864      pcre_compile(buffer, options, &error, &errptr, pcretables);
1865    if (pattern_list[pattern_count] != NULL)
1866      {
1867      pattern_count++;
1868      return TRUE;
1869      }
1870    
1871    /* Handle compile errors */
1872    
1873    errptr -= (int)strlen(prefix[process_options]);
1874    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1875    
1876    if (filename == NULL)
1877      {
1878      if (count == 0)
1879        fprintf(stderr, "pcregrep: Error in command-line regex "
1880          "at offset %d: %s\n", errptr, error);
1881      else
1882        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1883          "at offset %d: %s\n", ordin(count), errptr, error);
1884      }
1885    else
1886      {
1887      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1888        "at offset %d: %s\n", count, filename, errptr, error);
1889      }
1890    
1891    return FALSE;
1892    }
1893    
1894    
1895    
1896    /*************************************************
1897    *           Compile one supplied pattern         *
1898    *************************************************/
1899    
1900    /* When the -F option has been used, each string may be a list of strings,
1901    separated by line breaks. They will be matched literally.
1902    
1903    Arguments:
1904      pattern        the pattern string
1905      options        the PCRE options
1906      filename       the file name, or NULL for a command-line pattern
1907      count          0 if this is the only command line pattern, or
1908                     number of the command line pattern, or
1909                     linenumber for a pattern from a file
1910    
1911    Returns:         TRUE on success, FALSE after an error
1912    */
1913    
1914    static BOOL
1915    compile_pattern(char *pattern, int options, char *filename, int count)
1916    {
1917    if ((process_options & PO_FIXED_STRINGS) != 0)
1918      {
1919      char *eop = pattern + strlen(pattern);
1920      char buffer[MBUFTHIRD];
1921      for(;;)
1922        {
1923        int ellength;
1924        char *p = end_of_line(pattern, eop, &ellength);
1925        if (ellength == 0)
1926          return compile_single_pattern(pattern, options, filename, count);
1927        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1928        pattern = p;
1929        if (!compile_single_pattern(buffer, options, filename, count))
1930          return FALSE;
1931        }
1932      }
1933    else return compile_single_pattern(pattern, options, filename, count);
1934    }
1935    
1936    
1937    
1938    /*************************************************
1939  *                Main program                    *  *                Main program                    *
1940  *************************************************/  *************************************************/
1941    
# Line 838  main(int argc, char **argv) Line 1946  main(int argc, char **argv)
1946  {  {
1947  int i, j;  int i, j;
1948  int rc = 1;  int rc = 1;
1949  int options = 0;  int pcre_options = 0;
1950    int cmd_pattern_count = 0;
1951    int hint_count = 0;
1952  int errptr;  int errptr;
 const char *error;  
1953  BOOL only_one_at_top;  BOOL only_one_at_top;
1954    char *patterns[MAX_PATTERN_COUNT];
1955    const char *locale_from = "--locale";
1956    const char *error;
1957    
1958    /* Set the default line ending value from the default in the PCRE library;
1959    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1960    Note that the return values from pcre_config(), though derived from the ASCII
1961    codes, are the same in EBCDIC environments, so we must use the actual values
1962    rather than escapes such as as '\r'. */
1963    
1964    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1965    switch(i)
1966      {
1967      default:               newline = (char *)"lf"; break;
1968      case 13:               newline = (char *)"cr"; break;
1969      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1970      case -1:               newline = (char *)"any"; break;
1971      case -2:               newline = (char *)"anycrlf"; break;
1972      }
1973    
1974  /* Process the options */  /* Process the options */
1975    
# Line 855  for (i = 1; i < argc; i++) Line 1983  for (i = 1; i < argc; i++)
1983    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1984    
1985    /* If we hit an argument that is just "-", it may be a reference to STDIN,    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1986    but only if we have previously had -f to define the patterns. */    but only if we have previously had -e or -f to define the patterns. */
1987    
1988    if (argv[i][1] == 0)    if (argv[i][1] == 0)
1989      {      {
1990      if (pattern_filename != NULL) break;      if (pattern_filename != NULL || pattern_count > 0) break;
1991        else exit(usage(2));        else exit(usage(2));
1992      }      }
1993    
# Line 881  for (i = 1; i < argc; i++) Line 2009  for (i = 1; i < argc; i++)
2009      /* Some long options have data that follows after =, for example file=name.      /* Some long options have data that follows after =, for example file=name.
2010      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2011      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2012      with Jeff Friedl in preferring "regex" without the "p". These options are      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2013      entered in the table as "regex(p)". No option is in both these categories,      These options are entered in the table as "regex(p)". Options can be in
2014      fortunately. */      both these categories. */
2015    
2016      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2017        {        {
2018        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2019        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2020        if (opbra == NULL)     /* Not a (p) case */  
2021          /* Handle options with only one spelling of the name */
2022    
2023          if (opbra == NULL)     /* Does not contain '(' */
2024          {          {
2025          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2026            {            {
# Line 897  for (i = 1; i < argc; i++) Line 2028  for (i = 1; i < argc; i++)
2028            }            }
2029          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2030            {            {
2031            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2032            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2033                (int)strlen(arg) : (int)(argequals - arg);
2034            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2035              {              {
2036              option_data = arg + arglen;              option_data = arg + arglen;
# Line 911  for (i = 1; i < argc; i++) Line 2043  for (i = 1; i < argc; i++)
2043              }              }
2044            }            }
2045          }          }
2046        else                   /* Special case xxxx(p) */  
2047          /* Handle options with an alternate spelling of the name */
2048    
2049          else
2050          {          {
2051          char buff1[24];          char buff1[24];
2052          char buff2[24];          char buff2[24];
2053          int baselen = opbra - op->long_name;  
2054            int baselen = (int)(opbra - op->long_name);
2055            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2056            int arglen = (argequals == NULL || equals == NULL)?
2057              (int)strlen(arg) : (int)(argequals - arg);
2058    
2059          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2060          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2061            opbra + 1);  
2062          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2063               strncmp(arg, buff2, arglen) == 0)
2064              {
2065              if (equals != NULL && argequals != NULL)
2066                {
2067                option_data = argequals;
2068                if (*option_data == '=')
2069                  {
2070                  option_data++;
2071                  longopwasequals = TRUE;
2072                  }
2073                }
2074            break;            break;
2075              }
2076          }          }
2077        }        }
2078    
# Line 931  for (i = 1; i < argc; i++) Line 2083  for (i = 1; i < argc; i++)
2083        }        }
2084      }      }
2085    
2086      /* Jeffrey Friedl's debugging harness uses these additional options which
2087      are not in the right form for putting in the option table because they use
2088      only one hyphen, yet are more than one character long. By putting them
2089      separately here, they will not get displayed as part of the help() output,
2090      but I don't think Jeffrey will care about that. */
2091    
2092    #ifdef JFRIEDL_DEBUG
2093      else if (strcmp(argv[i], "-pre") == 0) {
2094              jfriedl_prefix = argv[++i];
2095              continue;
2096      } else if (strcmp(argv[i], "-post") == 0) {
2097              jfriedl_postfix = argv[++i];
2098              continue;
2099      } else if (strcmp(argv[i], "-XT") == 0) {
2100              sscanf(argv[++i], "%d", &jfriedl_XT);
2101              continue;
2102      } else if (strcmp(argv[i], "-XR") == 0) {
2103              sscanf(argv[++i], "%d", &jfriedl_XR);
2104              continue;
2105      }
2106    #endif
2107    
2108    
2109    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
2110    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
2111    
# Line 953  for (i = 1; i < argc; i++) Line 2128  for (i = 1; i < argc; i++)
2128          option_data = s+1;          option_data = s+1;
2129          break;          break;
2130          }          }
2131        options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
2132        }        }
2133      }      }
2134    
2135    /* At this point we should have op pointing to a matched option */    /* At this point we should have op pointing to a matched option. If the type
2136      is NO_DATA, it means that there is no data, and the option might set
2137      something in the PCRE options. */
2138    
2139    if (op->type == OP_NODATA)    if (op->type == OP_NODATA)
     options = handle_option(op->one_char, options);  
   else  
2140      {      {
2141      if (*option_data == 0)      pcre_options = handle_option(op->one_char, pcre_options);
2142        continue;
2143        }
2144    
2145      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2146      either has a value or defaults to something. It cannot have data in a
2147      separate item. At the moment, the only such options are "colo(u)r" and
2148      Jeffrey Friedl's special -S debugging option. */
2149    
2150      if (*option_data == 0 &&
2151          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2152        {
2153        switch (op->one_char)
2154        {        {
2155        if (i >= argc - 1 || longopwasequals)        case N_COLOUR:
2156          {        colour_option = (char *)"auto";
2157          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        break;
2158          exit(usage(2));  #ifdef JFRIEDL_DEBUG
2159          }        case 'S':
2160        option_data = argv[++i];        S_arg = 0;
2161          break;
2162    #endif
2163          }
2164        continue;
2165        }
2166    
2167      /* Otherwise, find the data string for the option. */
2168    
2169      if (*option_data == 0)
2170        {
2171        if (i >= argc - 1 || longopwasequals)
2172          {
2173          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2174          exit(usage(2));
2175        }        }
2176        option_data = argv[++i];
2177        }
2178    
2179      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2180      multiple times to create a list of patterns. */
2181    
2182      if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else    if (op->type == OP_PATLIST)
2183        {
2184        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2185          {
2186          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2187            MAX_PATTERN_COUNT);
2188          return 2;
2189          }
2190        patterns[cmd_pattern_count++] = option_data;
2191        }
2192    
2193      /* Otherwise, deal with single string or numeric data values. */
2194    
2195      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2196        {
2197        *((char **)op->dataptr) = option_data;
2198        }
2199      else
2200        {
2201        char *endptr;
2202        int n = strtoul(option_data, &endptr, 10);
2203        if (*endptr != 0)
2204        {        {
2205        char *endptr;        if (longop)
       int n = strtoul(option_data, &endptr, 10);  
       if (*endptr != 0)  
2206          {          {
2207          if (longop)          char *equals = strchr(op->long_name, '=');
2208            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2209              option_data, op->long_name);            (int)(equals - op->long_name);
2210          else          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2211            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",            option_data, nlen, op->long_name);
             option_data, op->one_char);  
         exit(usage(2));  
2212          }          }
2213        *((int *)op->dataptr) = n;        else
2214            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2215              option_data, op->one_char);
2216          exit(usage(2));
2217        }        }
2218        *((int *)op->dataptr) = n;
2219      }      }
2220    }    }
2221    
# Line 1001  if (both_context > 0) Line 2228  if (both_context > 0)
2228    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2229    }    }
2230    
2231  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2232  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  However, the latter two set the only_matching flag. */
2233    
2234  if (pattern_list == NULL || hints_list == NULL)  if ((only_matching && (file_offsets || line_offsets)) ||
2235        (file_offsets && line_offsets))
2236    {    {
2237    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2238    return 2;      "and/or --line-offsets\n");
2239      exit(usage(2));
2240    }    }
2241    
2242  /* Compile the regular expression(s). */  if (file_offsets || line_offsets) only_matching = TRUE;
2243    
2244  if (pattern_filename != NULL)  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2245    LC_ALL environment variable is set, and if so, use it. */
2246    
2247    if (locale == NULL)
2248    {    {
2249    FILE *f = fopen(pattern_filename, "r");    locale = getenv("LC_ALL");
2250    char buffer[MBUFTHIRD + 16];    locale_from = "LCC_ALL";
2251    char *rdstart;    }
   int adjust = 0;  
2252    
2253    if (f == NULL)  if (locale == NULL)
2254      {
2255      locale = getenv("LC_CTYPE");
2256      locale_from = "LC_CTYPE";
2257      }
2258    
2259    /* If a locale has been provided, set it, and generate the tables the PCRE
2260    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2261    
2262    if (locale != NULL)
2263      {
2264      if (setlocale(LC_CTYPE, locale) == NULL)
2265      {      {
2266      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2267        strerror(errno));        locale, locale_from);
2268      return 2;      return 2;
2269      }      }
2270      pcretables = pcre_maketables();
2271      }
2272    
2273    if (whole_lines)  /* Sort out colouring */
2274    
2275    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2276      {
2277      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2278      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2279      else
2280        {
2281        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2282          colour_option);
2283        return 2;
2284        }
2285      if (do_colour)
2286      {      {
2287      strcpy(buffer, "^(?:");      char *cs = getenv("PCREGREP_COLOUR");
2288      adjust = 4;      if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2289        if (cs != NULL) colour_string = cs;
2290      }      }
2291    else if (word_match)    }
2292    
2293    /* Interpret the newline type; the default settings are Unix-like. */
2294    
2295    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2296      {
2297      pcre_options |= PCRE_NEWLINE_CR;
2298      endlinetype = EL_CR;
2299      }
2300    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2301      {
2302      pcre_options |= PCRE_NEWLINE_LF;
2303      endlinetype = EL_LF;
2304      }
2305    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2306      {
2307      pcre_options |= PCRE_NEWLINE_CRLF;
2308      endlinetype = EL_CRLF;
2309      }
2310    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2311      {
2312      pcre_options |= PCRE_NEWLINE_ANY;
2313      endlinetype = EL_ANY;
2314      }
2315    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2316      {
2317      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2318      endlinetype = EL_ANYCRLF;
2319      }
2320    else
2321      {
2322      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2323      return 2;
2324      }
2325    
2326    /* Interpret the text values for -d and -D */
2327    
2328    if (dee_option != NULL)
2329      {
2330      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2331      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2332      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2333      else
2334      {      {
2335      strcpy(buffer, "\\b");      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2336      adjust = 2;      return 2;
2337      }      }
2338      }
2339    
2340    rdstart = buffer + adjust;  if (DEE_option != NULL)
2341    while (fgets(rdstart, MBUFTHIRD, f) != NULL)    {
2342      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2343      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2344      else
2345      {      {
2346      char *s = rdstart + (int)strlen(rdstart);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2347      if (pattern_count >= MAX_PATTERN_COUNT)      return 2;
       {  
       fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",  
         MAX_PATTERN_COUNT);  
       return 2;  
       }  
     while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;  
     if (s == rdstart) continue;  
     if (whole_lines) strcpy(s, ")$");  
       else if (word_match)strcpy(s, "\\b");  
         else *s = 0;  
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr - adjust, error);  
       return 2;  
       }  
2348      }      }
   fclose(f);  
2349    }    }
2350    
2351  /* If no file name, a single regex must be given inline. */  /* Check the values for Jeffrey Friedl's debugging options. */
2352    
2353  else  #ifdef JFRIEDL_DEBUG
2354    if (S_arg > 9)
2355      {
2356      fprintf(stderr, "pcregrep: bad value for -S option\n");
2357      return 2;
2358      }
2359    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2360    {    {
2361    char buffer[MBUFTHIRD + 16];    if (jfriedl_XT == 0) jfriedl_XT = 1;
2362    char *pat;    if (jfriedl_XR == 0) jfriedl_XR = 1;
2363    int adjust = 0;    }
2364    #endif
2365    
2366    /* Get memory to store the pattern and hints lists. */
2367    
2368    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2369    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2370    
2371    if (pattern_list == NULL || hints_list == NULL)
2372      {
2373      fprintf(stderr, "pcregrep: malloc failed\n");
2374      goto EXIT2;
2375      }
2376    
2377    /* If no patterns were provided by -e, and there is no file provided by -f,
2378    the first argument is the one and only pattern, and it must exist. */
2379    
2380    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2381      {
2382    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2383      patterns[cmd_pattern_count++] = argv[i++];
2384      }
2385    
2386    /* Compile the patterns that were provided on the command line, either by
2387    multiple uses of -e or as a single unkeyed pattern. */
2388    
2389    for (j = 0; j < cmd_pattern_count; j++)
2390      {
2391      if (!compile_pattern(patterns[j], pcre_options, NULL,
2392           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2393        goto EXIT2;
2394      }
2395    
2396    /* Compile the regular expressions that are provided in a file. */
2397    
2398    if (pattern_filename != NULL)
2399      {
2400      int linenumber = 0;
2401      FILE *f;
2402      char *filename;
2403      char buffer[MBUFTHIRD];
2404    
2405    if (whole_lines)    if (strcmp(pattern_filename, "-") == 0)
2406      {      {
2407      sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);      f = stdin;
2408      pat = buffer;      filename = stdin_name;
     adjust = 4;  
2409      }      }
2410    else if (word_match)    else
2411      {      {
2412      sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);      f = fopen(pattern_filename, "r");
2413      pat = buffer;      if (f == NULL)
2414      adjust = 2;        {
2415          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2416            strerror(errno));
2417          goto EXIT2;
2418          }
2419        filename = pattern_filename;
2420      }      }
   else pat = argv[i++];  
   
   pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);  
2421    
2422    if (pattern_list[0] == NULL)    while (fgets(buffer, MBUFTHIRD, f) != NULL)
2423      {      {
2424      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",      char *s = buffer + (int)strlen(buffer);
2425        errptr - adjust, error);      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2426      return 2;      *s = 0;
2427        linenumber++;
2428        if (buffer[0] == 0) continue;   /* Skip blank lines */
2429        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2430          goto EXIT2;
2431      }      }
2432    pattern_count++;  
2433      if (f != stdin) fclose(f);
2434    }    }
2435    
2436  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times */
# Line 1109  for (j = 0; j < pattern_count; j++) Line 2443  for (j = 0; j < pattern_count; j++)
2443      char s[16];      char s[16];
2444      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2445      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2446      return 2;      goto EXIT2;
2447      }      }
2448      hint_count++;
2449    }    }
2450    
2451  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2452    
2453  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
2454    {    {
2455    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2456        pcretables);
2457    if (exclude_compiled == NULL)    if (exclude_compiled == NULL)
2458      {      {
2459      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2460        errptr, error);        errptr, error);
2461      return 2;      goto EXIT2;
2462      }      }
2463    }    }
2464    
2465  if (include_pattern != NULL)  if (include_pattern != NULL)
2466    {    {
2467    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2468        pcretables);
2469    if (include_compiled == NULL)    if (include_compiled == NULL)
2470      {      {
2471      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2472        errptr, error);        errptr, error);
2473      return 2;      goto EXIT2;
2474      }      }
2475    }    }
2476    
2477  /* If there are no further arguments, do the business on stdin and exit */  if (exclude_dir_pattern != NULL)
2478      {
2479      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2480        pcretables);
2481      if (exclude_dir_compiled == NULL)
2482        {
2483        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2484          errptr, error);
2485        goto EXIT2;
2486        }
2487      }
2488    
2489  if (i >= argc) return pcregrep(stdin,  if (include_dir_pattern != NULL)
2490    (filenames_only || filenames_nomatch_only)? stdin_name : NULL);    {
2491      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2492        pcretables);
2493      if (include_dir_compiled == NULL)
2494        {
2495        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2496          errptr, error);
2497        goto EXIT2;
2498        }
2499      }
2500    
2501    /* If there are no further arguments, do the business on stdin and exit. */
2502    
2503    if (i >= argc)
2504      {
2505      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2506      goto EXIT;
2507      }
2508    
2509  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2510  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2511  the file name if the argument is not a directory and filenames_only is not set.  the file name if the argument is not a directory and filenames are not
2512  */  otherwise forced. */
2513    
2514  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2515    
2516  for (; i < argc; i++)  for (; i < argc; i++)
2517    {    {
2518    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2519        only_one_at_top);
2520    if (frc > 1) rc = frc;    if (frc > 1) rc = frc;
2521      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2522    }    }
2523    
2524    EXIT:
2525    if (pattern_list != NULL)
2526      {
2527      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2528      free(pattern_list);
2529      }
2530    if (hints_list != NULL)
2531      {
2532      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2533      free(hints_list);
2534      }
2535  return rc;  return rc;
2536    
2537    EXIT2:
2538    rc = 2;
2539    goto EXIT;
2540  }  }
2541    
2542  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.77  
changed lines
  Added in v.535

  ViewVC Help
Powered by ViewVC 1.1.5