/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 63 by nigel, Sat Feb 24 21:40:03 2007 UTC revision 357 by ph10, Tue Jul 8 14:18:28 2008 UTC
# Line 4  Line 4 
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories. */  directories.
8    
9               Copyright (c) 1997-2008 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 19  directories. */ Line 70  directories. */
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "3.0 14-Jan-2003"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    
75    #if BUFSIZ > 8192
76    #define MBUFTHIRD BUFSIZ
77    #else
78    #define MBUFTHIRD 8192
79    #endif
80    
81    /* Values for the "filenames" variable, which specifies options for file name
82    output. The order is important; it is assumed that a file name is wanted for
83    all values greater than FN_DEFAULT. */
84    
85    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86    
87    /* File reading styles */
88    
89    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90    
91    /* Actions for the -d and -D options */
92    
93    enum { dee_READ, dee_SKIP, dee_RECURSE };
94    enum { DEE_READ, DEE_SKIP };
95    
96    /* Actions for special processing options (flag bits) */
97    
98    #define PO_WORD_MATCH     0x0001
99    #define PO_LINE_MATCH     0x0002
100    #define PO_FIXED_STRINGS  0x0004
101    
102    /* Line ending types */
103    
104    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105    
106    
107    
108  /*************************************************  /*************************************************
109  *               Global variables                 *  *               Global variables                 *
110  *************************************************/  *************************************************/
111    
112    /* Jeffrey Friedl has some debugging requirements that are not part of the
113    regular code. */
114    
115    #ifdef JFRIEDL_DEBUG
116    static int S_arg = -1;
117    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
118    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
119    static const char *jfriedl_prefix = "";
120    static const char *jfriedl_postfix = "";
121    #endif
122    
123    static int  endlinetype;
124    
125    static char *colour_string = (char *)"1;31";
126    static char *colour_option = NULL;
127    static char *dee_option = NULL;
128    static char *DEE_option = NULL;
129    static char *newline = NULL;
130  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
131    static char *stdin_name = (char *)"(standard input)";
132    static char *locale = NULL;
133    
134    static const unsigned char *pcretables = NULL;
135    
136  static int  pattern_count = 0;  static int  pattern_count = 0;
137  static pcre **pattern_list;  static pcre **pattern_list = NULL;
138  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
139    
140    static char *include_pattern = NULL;
141    static char *exclude_pattern = NULL;
142    static char *include_dir_pattern = NULL;
143    static char *exclude_dir_pattern = NULL;
144    
145    static pcre *include_compiled = NULL;
146    static pcre *exclude_compiled = NULL;
147    static pcre *include_dir_compiled = NULL;
148    static pcre *exclude_dir_compiled = NULL;
149    
150    static int after_context = 0;
151    static int before_context = 0;
152    static int both_context = 0;
153    static int dee_action = dee_READ;
154    static int DEE_action = DEE_READ;
155    static int error_count = 0;
156    static int filenames = FN_DEFAULT;
157    static int process_options = 0;
158    
159  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
160  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
161  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
162    static BOOL hyphenpending = FALSE;
163  static BOOL invert = FALSE;  static BOOL invert = FALSE;
164    static BOOL line_offsets = FALSE;
165    static BOOL multiline = FALSE;
166  static BOOL number = FALSE;  static BOOL number = FALSE;
167  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
168    static BOOL quiet = FALSE;
169  static BOOL silent = FALSE;  static BOOL silent = FALSE;
170  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
171    
172  /* Structure for options and list of them */  /* Structure for options and list of them */
173    
174    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
175           OP_PATLIST };
176    
177  typedef struct option_item {  typedef struct option_item {
178      int type;
179    int one_char;    int one_char;
180    char *long_name;    void *dataptr;
181    char *help_text;    const char *long_name;
182      const char *help_text;
183  } option_item;  } option_item;
184    
185    /* Options without a single-letter equivalent get a negative value. This can be
186    used to identify them. */
187    
188    #define N_COLOUR       (-1)
189    #define N_EXCLUDE      (-2)
190    #define N_EXCLUDE_DIR  (-3)
191    #define N_HELP         (-4)
192    #define N_INCLUDE      (-5)
193    #define N_INCLUDE_DIR  (-6)
194    #define N_LABEL        (-7)
195    #define N_LOCALE       (-8)
196    #define N_NULL         (-9)
197    #define N_LOFFSETS     (-10)
198    #define N_FOFFSETS     (-11)
199    
200  static option_item optionlist[] = {  static option_item optionlist[] = {
201    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
202    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
203    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
204    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
205    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
206    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
207    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
208    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
209    { 'u', "utf-8",        "use UTF-8 mode" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
210    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
211    { 'v', "invert-match", "select non-matching lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
212    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
213    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
214    { 0,    NULL,           NULL }    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
215      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
216      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
217      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
218      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
219      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
220      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
221      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
222      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
223      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
224      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
225      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
226      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
227      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
228      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
229      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
230      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
231      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
232      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
233    #ifdef JFRIEDL_DEBUG
234      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
235    #endif
236      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
237      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
238      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
239      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
240      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
241      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
242      { OP_NODATA,    0,        NULL,               NULL,            NULL }
243  };  };
244    
245    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
246    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
247    that the combination of -w and -x has the same effect as -x on its own, so we
248    can treat them as the same. */
249    
250    static const char *prefix[] = {
251      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
252    
253    static const char *suffix[] = {
254      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
255    
256    /* UTF-8 tables - used only when the newline setting is "any". */
257    
258    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
259    
260    const char utf8_table4[] = {
261      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
262      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
264      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
265    
266    
267    
268  /*************************************************  /*************************************************
269  *       Functions for directory scanning         *  *            OS-specific functions               *
270  *************************************************/  *************************************************/
271    
272  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
273  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
274    
275    
276  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
277    
278  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
279  #include <sys/types.h>  #include <sys/types.h>
280  #include <sys/stat.h>  #include <sys/stat.h>
281  #include <dirent.h>  #include <dirent.h>
282    
283  typedef DIR directory_type;  typedef DIR directory_type;
284    
285  int  static int
286  isdirectory(char *filename)  isdirectory(char *filename)
287  {  {
288  struct stat statbuf;  struct stat statbuf;
# Line 94  if (stat(filename, &statbuf) < 0) Line 291  if (stat(filename, &statbuf) < 0)
291  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
292  }  }
293    
294  directory_type *  static directory_type *
295  opendirectory(char *filename)  opendirectory(char *filename)
296  {  {
297  return opendir(filename);  return opendir(filename);
298  }  }
299    
300  char *  static char *
301  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
302  {  {
303  for (;;)  for (;;)
# Line 110  for (;;) Line 307  for (;;)
307    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
308      return dent->d_name;      return dent->d_name;
309    }    }
310  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
311  }  }
312    
313  void  static void
314  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
315  {  {
316  closedir(dir);  closedir(dir);
317  }  }
318    
319    
320    /************* Test for regular file in Unix **********/
321    
322    static int
323    isregfile(char *filename)
324    {
325    struct stat statbuf;
326    if (stat(filename, &statbuf) < 0)
327      return 1;        /* In the expectation that opening as a file will fail */
328    return (statbuf.st_mode & S_IFMT) == S_IFREG;
329    }
330    
331    
332    /************* Test stdout for being a terminal in Unix **********/
333    
334    static BOOL
335    is_stdout_tty(void)
336    {
337    return isatty(fileno(stdout));
338    }
339    
340    
341  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
342    
343  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
344  Lionel Fourquaux. */  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
345    when it did not exist. David Byron added a patch that moved the #include of
346    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
347    */
348    
349  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
350    
351  #ifndef STRICT  #ifndef STRICT
352  # define STRICT  # define STRICT
# Line 134  Lionel Fourquaux. */ Line 354  Lionel Fourquaux. */
354  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
355  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
356  #endif  #endif
357    
358  #include <windows.h>  #include <windows.h>
359    
360    #ifndef INVALID_FILE_ATTRIBUTES
361    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
362    #endif
363    
364  typedef struct directory_type  typedef struct directory_type
365  {  {
366  HANDLE handle;  HANDLE handle;
# Line 213  free(dir); Line 438  free(dir);
438  }  }
439    
440    
441    /************* Test for regular file in Win32 **********/
442    
443    /* I don't know how to do this, or if it can be done; assume all paths are
444    regular if they are not directories. */
445    
446    int isregfile(char *filename)
447    {
448    return !isdirectory(filename);
449    }
450    
451    
452    /************* Test stdout for being a terminal in Win32 **********/
453    
454    /* I don't know how to do this; assume never */
455    
456    static BOOL
457    is_stdout_tty(void)
458    {
459    return FALSE;
460    }
461    
462    
463  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
464    
465  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 221  free(dir); Line 468  free(dir);
468    
469  typedef void directory_type;  typedef void directory_type;
470    
471  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
472  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
473  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
474  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
475    
476    
477    /************* Test for regular when we can't do it **********/
478    
479    /* Assume all files are regular. */
480    
481    int isregfile(char *filename) { return 1; }
482    
483    
484    /************* Test stdout for being a terminal when we can't do it **********/
485    
486    static BOOL
487    is_stdout_tty(void)
488    {
489    return FALSE;
490    }
491    
492    
493  #endif  #endif
494    
495    
496    
497  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
498  /*************************************************  /*************************************************
499  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
500  *************************************************/  *************************************************/
# Line 253  return sys_errlist[n]; Line 517  return sys_errlist[n];
517    
518    
519  /*************************************************  /*************************************************
520  *              Grep an individual file           *  *             Find end of line                   *
521  *************************************************/  *************************************************/
522    
523  static int  /* The length of the endline sequence that is found is set via lenptr. This may
524  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
525  {  
526  int rc = 1;  Arguments:
527  int linenumber = 0;    p         current position in line
528  int count = 0;    endptr    end of available data
529  int offsets[99];    lenptr    where to put the length of the eol sequence
530  char buffer[BUFSIZ];  
531    Returns:    pointer to the last byte of the line
532    */
533    
534  while (fgets(buffer, sizeof(buffer), in) != NULL)  static char *
535    end_of_line(char *p, char *endptr, int *lenptr)
536    {
537    switch(endlinetype)
538    {    {
539    BOOL match = FALSE;    default:      /* Just in case */
540    int i;    case EL_LF:
541    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
542    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
543    linenumber++;      {
544        *lenptr = 1;
545        return p + 1;
546        }
547      *lenptr = 0;
548      return endptr;
549    
550    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
551      while (p < endptr && *p != '\r') p++;
552      if (p < endptr)
553      {      {
554      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
555        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
556      }      }
557      *lenptr = 0;
558      return endptr;
559    
560    if (match != invert)    case EL_CRLF:
561      for (;;)
562      {      {
563      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
564        if (++p >= endptr)
565          {
566          *lenptr = 0;
567          return endptr;
568          }
569        if (*p == '\n')
570          {
571          *lenptr = 2;
572          return p + 1;
573          }
574        }
575      break;
576    
577      case EL_ANYCRLF:
578      while (p < endptr)
579        {
580        int extra = 0;
581        register int c = *((unsigned char *)p);
582    
583      else if (filenames_only)      if (utf8 && c >= 0xc0)
584        {        {
585        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
586        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
587          gcss = 6*extra;
588          c = (c & utf8_table3[extra]) << gcss;
589          for (gcii = 1; gcii <= extra; gcii++)
590            {
591            gcss -= 6;
592            c |= (p[gcii] & 0x3f) << gcss;
593            }
594        }        }
595    
596      else if (silent) return 0;      p += 1 + extra;
597    
598      else      switch (c)
599        {        {
600        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
601        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
602        fprintf(stdout, "%s\n", buffer);        return p;
603    
604          case 0x0d:    /* CR */
605          if (p < endptr && *p == 0x0a)
606            {
607            *lenptr = 2;
608            p++;
609            }
610          else *lenptr = 1;
611          return p;
612    
613          default:
614          break;
615        }        }
616        }   /* End of loop for ANYCRLF case */
617    
618      rc = 0;    *lenptr = 0;  /* Must have hit the end */
619      }    return endptr;
   }  
620    
621  if (count_only)    case EL_ANY:
622    {    while (p < endptr)
623    if (name != NULL) fprintf(stdout, "%s:", name);      {
624    fprintf(stdout, "%d\n", count);      int extra = 0;
625    }      register int c = *((unsigned char *)p);
626    
627  return rc;      if (utf8 && c >= 0xc0)
628  }        {
629          int gcii, gcss;
630          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
631          gcss = 6*extra;
632          c = (c & utf8_table3[extra]) << gcss;
633          for (gcii = 1; gcii <= extra; gcii++)
634            {
635            gcss -= 6;
636            c |= (p[gcii] & 0x3f) << gcss;
637            }
638          }
639    
640        p += 1 + extra;
641    
642        switch (c)
643          {
644          case 0x0a:    /* LF */
645          case 0x0b:    /* VT */
646          case 0x0c:    /* FF */
647          *lenptr = 1;
648          return p;
649    
650          case 0x0d:    /* CR */
651          if (p < endptr && *p == 0x0a)
652            {
653            *lenptr = 2;
654            p++;
655            }
656          else *lenptr = 1;
657          return p;
658    
659          case 0x85:    /* NEL */
660          *lenptr = utf8? 2 : 1;
661          return p;
662    
663          case 0x2028:  /* LS */
664          case 0x2029:  /* PS */
665          *lenptr = 3;
666          return p;
667    
668          default:
669          break;
670          }
671        }   /* End of loop for ANY case */
672    
673      *lenptr = 0;  /* Must have hit the end */
674      return endptr;
675      }     /* End of overall switch */
676    }
677    
678    
679    
680  /*************************************************  /*************************************************
681  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
682  *************************************************/  *************************************************/
683    
684  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
685    
686  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
687  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
688      startptr  start of available data
689    
690  if ((sep = isdirectory(filename)) != 0 && recurse)  Returns:    pointer to the start of the previous line
691    {  */
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
692    
693    if (dir == NULL)  static char *
694      {  previous_line(char *p, char *startptr)
695      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  {
696        strerror(errno));  switch(endlinetype)
697      return 2;    {
698      }    default:      /* Just in case */
699      case EL_LF:
700      p--;
701      while (p > startptr && p[-1] != '\n') p--;
702      return p;
703    
704      case EL_CR:
705      p--;
706      while (p > startptr && p[-1] != '\n') p--;
707      return p;
708    
709    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
710      for (;;)
711      {      {
712      int frc;      p -= 2;
713      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      while (p > startptr && p[-1] != '\n') p--;
714      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);      if (p <= startptr + 1 || p[-2] == '\r') return p;
     if (frc == 0 && rc == 1) rc = 0;  
715      }      }
716      return p;   /* But control should never get here */
717    
718    closedirectory(dir);    case EL_ANY:
719    return rc;    case EL_ANYCRLF:
720    }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
721      if (utf8) while ((*p & 0xc0) == 0x80) p--;
722    
723  /* If the file is not a directory, or we are not recursing, scan it. If this is    while (p > startptr)
724  the first and only argument at top level, we don't show the file name (unless      {
725  we are only showing the file name). Otherwise, control is via the      register int c;
726  show_filenames variable. */      char *pp = p - 1;
727    
728  in = fopen(filename, "r");      if (utf8)
729  if (in == NULL)        {
730    {        int extra = 0;
731    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));        while ((*pp & 0xc0) == 0x80) pp--;
732    return 2;        c = *((unsigned char *)pp);
733    }        if (c >= 0xc0)
734            {
735            int gcii, gcss;
736            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
737            gcss = 6*extra;
738            c = (c & utf8_table3[extra]) << gcss;
739            for (gcii = 1; gcii <= extra; gcii++)
740              {
741              gcss -= 6;
742              c |= (pp[gcii] & 0x3f) << gcss;
743              }
744            }
745          }
746        else c = *((unsigned char *)pp);
747    
748  rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?      if (endlinetype == EL_ANYCRLF) switch (c)
749    filename : NULL);        {
750  fclose(in);        case 0x0a:    /* LF */
751  return rc;        case 0x0d:    /* CR */
752  }        return p;
753    
754          default:
755          break;
756          }
757    
758        else switch (c)
759          {
760          case 0x0a:    /* LF */
761          case 0x0b:    /* VT */
762          case 0x0c:    /* FF */
763          case 0x0d:    /* CR */
764          case 0x85:    /* NEL */
765          case 0x2028:  /* LS */
766          case 0x2029:  /* PS */
767          return p;
768    
769          default:
770          break;
771          }
772    
773  /*************************************************      p = pp;  /* Back one character */
774  *                Usage function                  *      }        /* End of loop for ANY case */
 *************************************************/  
775    
776  static int    return startptr;  /* Hit start of data */
777  usage(int rc)    }     /* End of overall switch */
 {  
 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
778  }  }
779    
780    
781    
782    
783    
784  /*************************************************  /*************************************************
785  *                Help function                   *  *       Print the previous "after" lines         *
786  *************************************************/  *************************************************/
787    
788  static void  /* This is called if we are about to lose said lines because of buffer filling,
789  help(void)  and at the end of the file. The data in the line is written using fwrite() so
790  {  that a binary zero does not terminate it.
791  option_item *op;  
792    Arguments:
793  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");    lastmatchnumber   the number of the last matching line, plus one
794  printf("Search for PATTERN in each FILE or standard input.\n");    lastmatchrestart  where we restarted after the last match
795  printf("PATTERN must be present if -f is not used.\n");    endptr            end of available data
796  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");    printname         filename for printing
797    
798  printf("Options:\n");  Returns:            nothing
799    */
800    
801  for (op = optionlist; op->one_char != 0; op++)  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
802      char *endptr, char *printname)
803    {
804    if (after_context > 0 && lastmatchnumber > 0)
805    {    {
806    int n;    int count = 0;
807    char s[4];    while (lastmatchrestart < endptr && count++ < after_context)
808    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");      {
809    printf("  %s --%s%n", s, op->long_name, &n);      int ellength;
810    n = 30 - n;      char *pp = lastmatchrestart;
811    if (n < 1) n = 1;      if (printname != NULL) fprintf(stdout, "%s-", printname);
812    printf("%.*s%s\n", n, "                    ", op->help_text);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
813        pp = end_of_line(pp, endptr, &ellength);
814        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
815        lastmatchrestart = pp;
816        }
817      hyphenpending = TRUE;
818    }    }
   
 printf("\n  -f<filename>  or  --file=<filename>\n");  
 printf("    Read patterns from <filename> instead of using a command line option.\n");  
 printf("    Trailing white space is removed; blanks lines are ignored.\n");  
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
819  }  }
820    
821    
822    
   
823  /*************************************************  /*************************************************
824  *                Handle an option                *  *            Grep an individual file             *
825  *************************************************/  *************************************************/
826    
827    /* This is called from grep_or_recurse() below. It uses a buffer that is three
828    times the value of MBUFTHIRD. The matching point is never allowed to stray into
829    the top third of the buffer, thus keeping more of the file available for
830    context printing or for multiline scanning. For large files, the pointer will
831    be in the middle third most of the time, so the bottom third is available for
832    "before" context printing.
833    
834    Arguments:
835      handle       the fopened FILE stream for a normal file
836                   the gzFile pointer when reading is via libz
837                   the BZFILE pointer when reading is via libbz2
838      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
839      printname    the file name if it is to be printed for each match
840                   or NULL if the file name is not to be printed
841                   it cannot be NULL if filenames[_nomatch]_only is set
842    
843    Returns:       0 if there was at least one match
844                   1 otherwise (no matches)
845                   2 if there is a read error on a .bz2 file
846    */
847    
848  static int  static int
849  handle_option(int letter, int options)  pcregrep(void *handle, int frtype, char *printname)
850  {  {
851  switch(letter)  int rc = 1;
852    {  int linenumber = 1;
853    case -1:  help(); exit(0);  int lastmatchnumber = 0;
854    case 'c': count_only = TRUE; break;  int count = 0;
855    case 'h': filenames = FALSE; break;  int filepos = 0;
856    case 'i': options |= PCRE_CASELESS; break;  int offsets[99];
857    case 'l': filenames_only = TRUE;  char *lastmatchrestart = NULL;
858    case 'n': number = TRUE; break;  char buffer[3*MBUFTHIRD];
859    case 'r': recurse = TRUE; break;  char *ptr = buffer;
860    case 's': silent = TRUE; break;  char *endptr;
861    case 'u': options |= PCRE_UTF8; break;  size_t bufflength;
862    case 'v': invert = TRUE; break;  BOOL endhyphenpending = FALSE;
863    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  FILE *in = NULL;                    /* Ensure initialized */
864    
865    case 'V':  #ifdef SUPPORT_LIBZ
866    fprintf(stderr, "pcregrep version %s using ", VERSION);  gzFile ingz = NULL;
867    fprintf(stderr, "PCRE version %s\n", pcre_version());  #endif
   exit(0);  
   break;  
868    
869    default:  #ifdef SUPPORT_LIBBZ2
870    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);  BZFILE *inbz2 = NULL;
871    exit(usage(2));  #endif
   }  
872    
 return options;  
 }  
873    
874    /* Do the first read into the start of the buffer and set up the pointer to end
875    of what we have. In the case of libz, a non-zipped .gz file will be read as a
876    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
877    fail. */
878    
879    #ifdef SUPPORT_LIBZ
880    if (frtype == FR_LIBZ)
881      {
882      ingz = (gzFile)handle;
883      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
884      }
885    else
886    #endif
887    
888    #ifdef SUPPORT_LIBBZ2
889    if (frtype == FR_LIBBZ2)
890      {
891      inbz2 = (BZFILE *)handle;
892      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
893      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
894      }                                    /* without the cast it is unsigned. */
895    else
896    #endif
897    
898  /*************************************************    {
899  *                Main program                    *    in = (FILE *)handle;
900  *************************************************/    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
901      }
902    
903  int  endptr = buffer + bufflength;
 main(int argc, char **argv)  
 {  
 int i, j;  
 int rc = 1;  
 int options = 0;  
 int errptr;  
 const char *error;  
 BOOL only_one_at_top;  
904    
905  /* Process the options */  /* Loop while the current pointer is not at the end of the file. For large
906    files, endptr will be at the end of the buffer when we are in the middle of the
907    file, but ptr will never get there, because as soon as it gets over 2/3 of the
908    way, the buffer is shifted left and re-filled. */
909    
910  for (i = 1; i < argc; i++)  while (ptr < endptr)
911    {    {
912    if (argv[i][0] != '-') break;    int i, endlinelength;
913      int mrc = 0;
914    /* Missing options */    BOOL match = FALSE;
915      char *matchptr = ptr;
916    if (argv[i][1] == 0) exit(usage(2));    char *t = ptr;
917      size_t length, linelength;
918    
919      /* At this point, ptr is at the start of a line. We need to find the length
920      of the subject string to pass to pcre_exec(). In multiline mode, it is the
921      length remainder of the data in the buffer. Otherwise, it is the length of
922      the next line. After matching, we always advance by the length of the next
923      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
924      that any match is constrained to be in the first line. */
925    
926      t = end_of_line(t, endptr, &endlinelength);
927      linelength = t - ptr - endlinelength;
928      length = multiline? (size_t)(endptr - ptr) : linelength;
929    
930    /* Long name options */    /* Extra processing for Jeffrey Friedl's debugging. */
931    
932    if (argv[i][1] == '-')  #ifdef JFRIEDL_DEBUG
933      {    if (jfriedl_XT || jfriedl_XR)
934      option_item *op;    {
935          #include <sys/time.h>
936          #include <time.h>
937          struct timeval start_time, end_time;
938          struct timezone dummy;
939    
940      if (strncmp(argv[i]+2, "file=", 5) == 0)        if (jfriedl_XT)
941        {        {
942        pattern_filename = argv[i] + 7;            unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
943        continue;            const char *orig = ptr;
944              ptr = malloc(newlen + 1);
945              if (!ptr) {
946                      printf("out of memory");
947                      exit(2);
948              }
949              endptr = ptr;
950              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
951              for (i = 0; i < jfriedl_XT; i++) {
952                      strncpy(endptr, orig,  length);
953                      endptr += length;
954              }
955              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
956              length = newlen;
957          }
958    
959          if (gettimeofday(&start_time, &dummy) != 0)
960                  perror("bad gettimeofday");
961    
962    
963          for (i = 0; i < jfriedl_XR; i++)
964              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
965    
966          if (gettimeofday(&end_time, &dummy) != 0)
967                  perror("bad gettimeofday");
968    
969          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
970                          -
971                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
972    
973          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
974          return 0;
975      }
976    #endif
977    
978      /* We come back here after a match when the -o option (only_matching) is set,
979      in order to find any further matches in the same line. */
980    
981      ONLY_MATCHING_RESTART:
982    
983      /* Run through all the patterns until one matches. Note that we don't include
984      the final newline in the subject string. */
985    
986      for (i = 0; i < pattern_count; i++)
987        {
988        mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
989          offsets, 99);
990        if (mrc >= 0) { match = TRUE; break; }
991        if (mrc != PCRE_ERROR_NOMATCH)
992          {
993          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
994          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
995          fprintf(stderr, "this line:\n");
996          fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
997          fprintf(stderr, "\n");
998          if (error_count == 0 &&
999              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
1000            {
1001            fprintf(stderr, "pcregrep: error %d means that a resource limit "
1002              "was exceeded\n", mrc);
1003            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
1004            }
1005          if (error_count++ > 20)
1006            {
1007            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
1008            exit(2);
1009            }
1010          match = invert;    /* No more matching; don't show the line again */
1011          break;
1012          }
1013        }
1014    
1015      /* If it's a match or a not-match (as required), do what's wanted. */
1016    
1017      if (match != invert)
1018        {
1019        BOOL hyphenprinted = FALSE;
1020    
1021        /* We've failed if we want a file that doesn't have any matches. */
1022    
1023        if (filenames == FN_NOMATCH_ONLY) return 1;
1024    
1025        /* Just count if just counting is wanted. */
1026    
1027        if (count_only) count++;
1028    
1029        /* If all we want is a file name, there is no need to scan any more lines
1030        in the file. */
1031    
1032        else if (filenames == FN_ONLY)
1033          {
1034          fprintf(stdout, "%s\n", printname);
1035          return 0;
1036          }
1037    
1038        /* Likewise, if all we want is a yes/no answer. */
1039    
1040        else if (quiet) return 0;
1041    
1042        /* The --only-matching option prints just the substring that matched, and
1043        the --file-offsets and --line-offsets options output offsets for the
1044        matching substring (they both force --only-matching). None of these options
1045        prints any context. Afterwards, adjust the start and length, and then jump
1046        back to look for further matches in the same line. If we are in invert
1047        mode, however, nothing is printed - this could be still useful because the
1048        return code is set. */
1049    
1050        else if (only_matching)
1051          {
1052          if (!invert)
1053            {
1054            if (printname != NULL) fprintf(stdout, "%s:", printname);
1055            if (number) fprintf(stdout, "%d:", linenumber);
1056            if (line_offsets)
1057              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1058                offsets[1] - offsets[0]);
1059            else if (file_offsets)
1060              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1061                offsets[1] - offsets[0]);
1062            else
1063              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1064            fprintf(stdout, "\n");
1065            matchptr += offsets[1];
1066            length -= offsets[1];
1067            match = FALSE;
1068            goto ONLY_MATCHING_RESTART;
1069            }
1070          }
1071    
1072        /* This is the default case when none of the above options is set. We print
1073        the matching lines(s), possibly preceded and/or followed by other lines of
1074        context. */
1075    
1076        else
1077          {
1078          /* See if there is a requirement to print some "after" lines from a
1079          previous match. We never print any overlaps. */
1080    
1081          if (after_context > 0 && lastmatchnumber > 0)
1082            {
1083            int ellength;
1084            int linecount = 0;
1085            char *p = lastmatchrestart;
1086    
1087            while (p < ptr && linecount < after_context)
1088              {
1089              p = end_of_line(p, ptr, &ellength);
1090              linecount++;
1091              }
1092    
1093            /* It is important to advance lastmatchrestart during this printing so
1094            that it interacts correctly with any "before" printing below. Print
1095            each line's data using fwrite() in case there are binary zeroes. */
1096    
1097            while (lastmatchrestart < p)
1098              {
1099              char *pp = lastmatchrestart;
1100              if (printname != NULL) fprintf(stdout, "%s-", printname);
1101              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1102              pp = end_of_line(pp, endptr, &ellength);
1103              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1104              lastmatchrestart = pp;
1105              }
1106            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1107            }
1108    
1109          /* If there were non-contiguous lines printed above, insert hyphens. */
1110    
1111          if (hyphenpending)
1112            {
1113            fprintf(stdout, "--\n");
1114            hyphenpending = FALSE;
1115            hyphenprinted = TRUE;
1116            }
1117    
1118          /* See if there is a requirement to print some "before" lines for this
1119          match. Again, don't print overlaps. */
1120    
1121          if (before_context > 0)
1122            {
1123            int linecount = 0;
1124            char *p = ptr;
1125    
1126            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1127                   linecount < before_context)
1128              {
1129              linecount++;
1130              p = previous_line(p, buffer);
1131              }
1132    
1133            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1134              fprintf(stdout, "--\n");
1135    
1136            while (p < ptr)
1137              {
1138              int ellength;
1139              char *pp = p;
1140              if (printname != NULL) fprintf(stdout, "%s-", printname);
1141              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1142              pp = end_of_line(pp, endptr, &ellength);
1143              fwrite(p, 1, pp - p, stdout);
1144              p = pp;
1145              }
1146            }
1147    
1148          /* Now print the matching line(s); ensure we set hyphenpending at the end
1149          of the file if any context lines are being output. */
1150    
1151          if (after_context > 0 || before_context > 0)
1152            endhyphenpending = TRUE;
1153    
1154          if (printname != NULL) fprintf(stdout, "%s:", printname);
1155          if (number) fprintf(stdout, "%d:", linenumber);
1156    
1157          /* In multiline mode, we want to print to the end of the line in which
1158          the end of the matched string is found, so we adjust linelength and the
1159          line number appropriately, but only when there actually was a match
1160          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1161          the match will always be before the first newline sequence. */
1162    
1163          if (multiline)
1164            {
1165            int ellength;
1166            char *endmatch = ptr;
1167            if (!invert)
1168              {
1169              endmatch += offsets[1];
1170              t = ptr;
1171              while (t < endmatch)
1172                {
1173                t = end_of_line(t, endptr, &ellength);
1174                if (t <= endmatch) linenumber++; else break;
1175                }
1176              }
1177            endmatch = end_of_line(endmatch, endptr, &ellength);
1178            linelength = endmatch - ptr - ellength;
1179            }
1180    
1181          /*** NOTE: Use only fwrite() to output the data line, so that binary
1182          zeroes are treated as just another data character. */
1183    
1184          /* This extra option, for Jeffrey Friedl's debugging requirements,
1185          replaces the matched string, or a specific captured string if it exists,
1186          with X. When this happens, colouring is ignored. */
1187    
1188    #ifdef JFRIEDL_DEBUG
1189          if (S_arg >= 0 && S_arg < mrc)
1190            {
1191            int first = S_arg * 2;
1192            int last  = first + 1;
1193            fwrite(ptr, 1, offsets[first], stdout);
1194            fprintf(stdout, "X");
1195            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1196            }
1197          else
1198    #endif
1199    
1200          /* We have to split the line(s) up if colouring. */
1201    
1202          if (do_colour)
1203            {
1204            fwrite(ptr, 1, offsets[0], stdout);
1205            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1206            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1207            fprintf(stdout, "%c[00m", 0x1b);
1208            fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1209              stdout);
1210            }
1211          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1212          }
1213    
1214        /* End of doing what has to be done for a match */
1215    
1216        rc = 0;    /* Had some success */
1217    
1218        /* Remember where the last match happened for after_context. We remember
1219        where we are about to restart, and that line's number. */
1220    
1221        lastmatchrestart = ptr + linelength + endlinelength;
1222        lastmatchnumber = linenumber + 1;
1223        }
1224    
1225      /* For a match in multiline inverted mode (which of course did not cause
1226      anything to be printed), we have to move on to the end of the match before
1227      proceeding. */
1228    
1229      if (multiline && invert && match)
1230        {
1231        int ellength;
1232        char *endmatch = ptr + offsets[1];
1233        t = ptr;
1234        while (t < endmatch)
1235          {
1236          t = end_of_line(t, endptr, &ellength);
1237          if (t <= endmatch) linenumber++; else break;
1238          }
1239        endmatch = end_of_line(endmatch, endptr, &ellength);
1240        linelength = endmatch - ptr - ellength;
1241        }
1242    
1243      /* Advance to after the newline and increment the line number. The file
1244      offset to the current line is maintained in filepos. */
1245    
1246      ptr += linelength + endlinelength;
1247      filepos += linelength + endlinelength;
1248      linenumber++;
1249    
1250      /* If we haven't yet reached the end of the file (the buffer is full), and
1251      the current point is in the top 1/3 of the buffer, slide the buffer down by
1252      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1253      about to be lost, print them. */
1254    
1255      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1256        {
1257        if (after_context > 0 &&
1258            lastmatchnumber > 0 &&
1259            lastmatchrestart < buffer + MBUFTHIRD)
1260          {
1261          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1262          lastmatchnumber = 0;
1263          }
1264    
1265        /* Now do the shuffle */
1266    
1267        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1268        ptr -= MBUFTHIRD;
1269    
1270    #ifdef SUPPORT_LIBZ
1271        if (frtype == FR_LIBZ)
1272          bufflength = 2*MBUFTHIRD +
1273            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1274        else
1275    #endif
1276    
1277    #ifdef SUPPORT_LIBBZ2
1278        if (frtype == FR_LIBBZ2)
1279          bufflength = 2*MBUFTHIRD +
1280            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1281        else
1282    #endif
1283    
1284        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1285    
1286        endptr = buffer + bufflength;
1287    
1288        /* Adjust any last match point */
1289    
1290        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1291        }
1292      }     /* Loop through the whole file */
1293    
1294    /* End of file; print final "after" lines if wanted; do_after_lines sets
1295    hyphenpending if it prints something. */
1296    
1297    if (!only_matching && !count_only)
1298      {
1299      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1300      hyphenpending |= endhyphenpending;
1301      }
1302    
1303    /* Print the file name if we are looking for those without matches and there
1304    were none. If we found a match, we won't have got this far. */
1305    
1306    if (filenames == FN_NOMATCH_ONLY)
1307      {
1308      fprintf(stdout, "%s\n", printname);
1309      return 0;
1310      }
1311    
1312    /* Print the match count if wanted */
1313    
1314    if (count_only)
1315      {
1316      if (printname != NULL) fprintf(stdout, "%s:", printname);
1317      fprintf(stdout, "%d\n", count);
1318      }
1319    
1320    return rc;
1321    }
1322    
1323    
1324    
1325    /*************************************************
1326    *     Grep a file or recurse into a directory    *
1327    *************************************************/
1328    
1329    /* Given a path name, if it's a directory, scan all the files if we are
1330    recursing; if it's a file, grep it.
1331    
1332    Arguments:
1333      pathname          the path to investigate
1334      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1335      only_one_at_top   TRUE if the path is the only one at toplevel
1336    
1337    Returns:   0 if there was at least one match
1338               1 if there were no matches
1339               2 there was some kind of error
1340    
1341    However, file opening failures are suppressed if "silent" is set.
1342    */
1343    
1344    static int
1345    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1346    {
1347    int rc = 1;
1348    int sep;
1349    int frtype;
1350    int pathlen;
1351    void *handle;
1352    FILE *in = NULL;           /* Ensure initialized */
1353    
1354    #ifdef SUPPORT_LIBZ
1355    gzFile ingz = NULL;
1356    #endif
1357    
1358    #ifdef SUPPORT_LIBBZ2
1359    BZFILE *inbz2 = NULL;
1360    #endif
1361    
1362    /* If the file name is "-" we scan stdin */
1363    
1364    if (strcmp(pathname, "-") == 0)
1365      {
1366      return pcregrep(stdin, FR_PLAIN,
1367        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1368          stdin_name : NULL);
1369      }
1370    
1371    /* If the file is a directory, skip if skipping or if we are recursing, scan
1372    each file and directory within it, subject to any include or exclude patterns
1373    that were set. The scanning code is localized so it can be made
1374    system-specific. */
1375    
1376    if ((sep = isdirectory(pathname)) != 0)
1377      {
1378      if (dee_action == dee_SKIP) return 1;
1379      if (dee_action == dee_RECURSE)
1380        {
1381        char buffer[1024];
1382        char *nextfile;
1383        directory_type *dir = opendirectory(pathname);
1384    
1385        if (dir == NULL)
1386          {
1387          if (!silent)
1388            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1389              strerror(errno));
1390          return 2;
1391          }
1392    
1393        while ((nextfile = readdirectory(dir)) != NULL)
1394          {
1395          int frc, nflen;
1396          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1397          nflen = strlen(nextfile);
1398    
1399          if (isdirectory(buffer))
1400            {
1401            if (exclude_dir_compiled != NULL &&
1402                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1403              continue;
1404    
1405            if (include_dir_compiled != NULL &&
1406                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1407              continue;
1408            }
1409          else
1410            {
1411            if (exclude_compiled != NULL &&
1412                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1413              continue;
1414    
1415            if (include_compiled != NULL &&
1416                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1417              continue;
1418            }
1419    
1420          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1421          if (frc > 1) rc = frc;
1422           else if (frc == 0 && rc == 1) rc = 0;
1423          }
1424    
1425        closedirectory(dir);
1426        return rc;
1427        }
1428      }
1429    
1430    /* If the file is not a directory and not a regular file, skip it if that's
1431    been requested. */
1432    
1433    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1434    
1435    /* Control reaches here if we have a regular file, or if we have a directory
1436    and recursion or skipping was not requested, or if we have anything else and
1437    skipping was not requested. The scan proceeds. If this is the first and only
1438    argument at top level, we don't show the file name, unless we are only showing
1439    the file name, or the filename was forced (-H). */
1440    
1441    pathlen = strlen(pathname);
1442    
1443    /* Open using zlib if it is supported and the file name ends with .gz. */
1444    
1445    #ifdef SUPPORT_LIBZ
1446    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1447      {
1448      ingz = gzopen(pathname, "rb");
1449      if (ingz == NULL)
1450        {
1451        if (!silent)
1452          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1453            strerror(errno));
1454        return 2;
1455        }
1456      handle = (void *)ingz;
1457      frtype = FR_LIBZ;
1458      }
1459    else
1460    #endif
1461    
1462    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1463    
1464    #ifdef SUPPORT_LIBBZ2
1465    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1466      {
1467      inbz2 = BZ2_bzopen(pathname, "rb");
1468      handle = (void *)inbz2;
1469      frtype = FR_LIBBZ2;
1470      }
1471    else
1472    #endif
1473    
1474    /* Otherwise use plain fopen(). The label is so that we can come back here if
1475    an attempt to read a .bz2 file indicates that it really is a plain file. */
1476    
1477    #ifdef SUPPORT_LIBBZ2
1478    PLAIN_FILE:
1479    #endif
1480      {
1481      in = fopen(pathname, "r");
1482      handle = (void *)in;
1483      frtype = FR_PLAIN;
1484      }
1485    
1486    /* All the opening methods return errno when they fail. */
1487    
1488    if (handle == NULL)
1489      {
1490      if (!silent)
1491        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1492          strerror(errno));
1493      return 2;
1494      }
1495    
1496    /* Now grep the file */
1497    
1498    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1499      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1500    
1501    /* Close in an appropriate manner. */
1502    
1503    #ifdef SUPPORT_LIBZ
1504    if (frtype == FR_LIBZ)
1505      gzclose(ingz);
1506    else
1507    #endif
1508    
1509    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1510    read failed. If the error indicates that the file isn't in fact bzipped, try
1511    again as a normal file. */
1512    
1513    #ifdef SUPPORT_LIBBZ2
1514    if (frtype == FR_LIBBZ2)
1515      {
1516      if (rc == 2)
1517        {
1518        int errnum;
1519        const char *err = BZ2_bzerror(inbz2, &errnum);
1520        if (errnum == BZ_DATA_ERROR_MAGIC)
1521          {
1522          BZ2_bzclose(inbz2);
1523          goto PLAIN_FILE;
1524          }
1525        else if (!silent)
1526          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1527            pathname, err);
1528        }
1529      BZ2_bzclose(inbz2);
1530      }
1531    else
1532    #endif
1533    
1534    /* Normal file close */
1535    
1536    fclose(in);
1537    
1538    /* Pass back the yield from pcregrep(). */
1539    
1540    return rc;
1541    }
1542    
1543    
1544    
1545    
1546    /*************************************************
1547    *                Usage function                  *
1548    *************************************************/
1549    
1550    static int
1551    usage(int rc)
1552    {
1553    option_item *op;
1554    fprintf(stderr, "Usage: pcregrep [-");
1555    for (op = optionlist; op->one_char != 0; op++)
1556      {
1557      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1558      }
1559    fprintf(stderr, "] [long options] [pattern] [files]\n");
1560    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1561      "options.\n");
1562    return rc;
1563    }
1564    
1565    
1566    
1567    
1568    /*************************************************
1569    *                Help function                   *
1570    *************************************************/
1571    
1572    static void
1573    help(void)
1574    {
1575    option_item *op;
1576    
1577    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1578    printf("Search for PATTERN in each FILE or standard input.\n");
1579    printf("PATTERN must be present if neither -e nor -f is used.\n");
1580    printf("\"-\" can be used as a file name to mean STDIN.\n");
1581    
1582    #ifdef SUPPORT_LIBZ
1583    printf("Files whose names end in .gz are read using zlib.\n");
1584    #endif
1585    
1586    #ifdef SUPPORT_LIBBZ2
1587    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1588    #endif
1589    
1590    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1591    printf("Other files and the standard input are read as plain files.\n\n");
1592    #else
1593    printf("All files are read as plain files, without any interpretation.\n\n");
1594    #endif
1595    
1596    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1597    printf("Options:\n");
1598    
1599    for (op = optionlist; op->one_char != 0; op++)
1600      {
1601      int n;
1602      char s[4];
1603      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1604      n = 30 - printf("  %s --%s", s, op->long_name);
1605      if (n < 1) n = 1;
1606      printf("%.*s%s\n", n, "                    ", op->help_text);
1607      }
1608    
1609    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1610    printf("trailing white space is removed and blank lines are ignored.\n");
1611    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1612    
1613    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1614    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1615    }
1616    
1617    
1618    
1619    
1620    /*************************************************
1621    *    Handle a single-letter, no data option      *
1622    *************************************************/
1623    
1624    static int
1625    handle_option(int letter, int options)
1626    {
1627    switch(letter)
1628      {
1629      case N_FOFFSETS: file_offsets = TRUE; break;
1630      case N_HELP: help(); exit(0);
1631      case N_LOFFSETS: line_offsets = number = TRUE; break;
1632      case 'c': count_only = TRUE; break;
1633      case 'F': process_options |= PO_FIXED_STRINGS; break;
1634      case 'H': filenames = FN_FORCE; break;
1635      case 'h': filenames = FN_NONE; break;
1636      case 'i': options |= PCRE_CASELESS; break;
1637      case 'l': filenames = FN_ONLY; break;
1638      case 'L': filenames = FN_NOMATCH_ONLY; break;
1639      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1640      case 'n': number = TRUE; break;
1641      case 'o': only_matching = TRUE; break;
1642      case 'q': quiet = TRUE; break;
1643      case 'r': dee_action = dee_RECURSE; break;
1644      case 's': silent = TRUE; break;
1645      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1646      case 'v': invert = TRUE; break;
1647      case 'w': process_options |= PO_WORD_MATCH; break;
1648      case 'x': process_options |= PO_LINE_MATCH; break;
1649    
1650      case 'V':
1651      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1652      exit(0);
1653      break;
1654    
1655      default:
1656      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1657      exit(usage(2));
1658      }
1659    
1660    return options;
1661    }
1662    
1663    
1664    
1665    
1666    /*************************************************
1667    *          Construct printed ordinal             *
1668    *************************************************/
1669    
1670    /* This turns a number into "1st", "3rd", etc. */
1671    
1672    static char *
1673    ordin(int n)
1674    {
1675    static char buffer[8];
1676    char *p = buffer;
1677    sprintf(p, "%d", n);
1678    while (*p != 0) p++;
1679    switch (n%10)
1680      {
1681      case 1: strcpy(p, "st"); break;
1682      case 2: strcpy(p, "nd"); break;
1683      case 3: strcpy(p, "rd"); break;
1684      default: strcpy(p, "th"); break;
1685      }
1686    return buffer;
1687    }
1688    
1689    
1690    
1691    /*************************************************
1692    *          Compile a single pattern              *
1693    *************************************************/
1694    
1695    /* When the -F option has been used, this is called for each substring.
1696    Otherwise it's called for each supplied pattern.
1697    
1698    Arguments:
1699      pattern        the pattern string
1700      options        the PCRE options
1701      filename       the file name, or NULL for a command-line pattern
1702      count          0 if this is the only command line pattern, or
1703                     number of the command line pattern, or
1704                     linenumber for a pattern from a file
1705    
1706    Returns:         TRUE on success, FALSE after an error
1707    */
1708    
1709    static BOOL
1710    compile_single_pattern(char *pattern, int options, char *filename, int count)
1711    {
1712    char buffer[MBUFTHIRD + 16];
1713    const char *error;
1714    int errptr;
1715    
1716    if (pattern_count >= MAX_PATTERN_COUNT)
1717      {
1718      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1719        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1720      return FALSE;
1721      }
1722    
1723    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1724      suffix[process_options]);
1725    pattern_list[pattern_count] =
1726      pcre_compile(buffer, options, &error, &errptr, pcretables);
1727    if (pattern_list[pattern_count] != NULL)
1728      {
1729      pattern_count++;
1730      return TRUE;
1731      }
1732    
1733    /* Handle compile errors */
1734    
1735    errptr -= (int)strlen(prefix[process_options]);
1736    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1737    
1738    if (filename == NULL)
1739      {
1740      if (count == 0)
1741        fprintf(stderr, "pcregrep: Error in command-line regex "
1742          "at offset %d: %s\n", errptr, error);
1743      else
1744        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1745          "at offset %d: %s\n", ordin(count), errptr, error);
1746      }
1747    else
1748      {
1749      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1750        "at offset %d: %s\n", count, filename, errptr, error);
1751      }
1752    
1753    return FALSE;
1754    }
1755    
1756    
1757    
1758    /*************************************************
1759    *           Compile one supplied pattern         *
1760    *************************************************/
1761    
1762    /* When the -F option has been used, each string may be a list of strings,
1763    separated by line breaks. They will be matched literally.
1764    
1765    Arguments:
1766      pattern        the pattern string
1767      options        the PCRE options
1768      filename       the file name, or NULL for a command-line pattern
1769      count          0 if this is the only command line pattern, or
1770                     number of the command line pattern, or
1771                     linenumber for a pattern from a file
1772    
1773    Returns:         TRUE on success, FALSE after an error
1774    */
1775    
1776    static BOOL
1777    compile_pattern(char *pattern, int options, char *filename, int count)
1778    {
1779    if ((process_options & PO_FIXED_STRINGS) != 0)
1780      {
1781      char *eop = pattern + strlen(pattern);
1782      char buffer[MBUFTHIRD];
1783      for(;;)
1784        {
1785        int ellength;
1786        char *p = end_of_line(pattern, eop, &ellength);
1787        if (ellength == 0)
1788          return compile_single_pattern(pattern, options, filename, count);
1789        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1790        pattern = p;
1791        if (!compile_single_pattern(buffer, options, filename, count))
1792          return FALSE;
1793        }
1794      }
1795    else return compile_single_pattern(pattern, options, filename, count);
1796    }
1797    
1798    
1799    
1800    /*************************************************
1801    *                Main program                    *
1802    *************************************************/
1803    
1804    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1805    
1806    int
1807    main(int argc, char **argv)
1808    {
1809    int i, j;
1810    int rc = 1;
1811    int pcre_options = 0;
1812    int cmd_pattern_count = 0;
1813    int hint_count = 0;
1814    int errptr;
1815    BOOL only_one_at_top;
1816    char *patterns[MAX_PATTERN_COUNT];
1817    const char *locale_from = "--locale";
1818    const char *error;
1819    
1820    /* Set the default line ending value from the default in the PCRE library;
1821    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1822    */
1823    
1824    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1825    switch(i)
1826      {
1827      default:                 newline = (char *)"lf"; break;
1828      case '\r':               newline = (char *)"cr"; break;
1829      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1830      case -1:                 newline = (char *)"any"; break;
1831      case -2:                 newline = (char *)"anycrlf"; break;
1832      }
1833    
1834    /* Process the options */
1835    
1836    for (i = 1; i < argc; i++)
1837      {
1838      option_item *op = NULL;
1839      char *option_data = (char *)"";    /* default to keep compiler happy */
1840      BOOL longop;
1841      BOOL longopwasequals = FALSE;
1842    
1843      if (argv[i][0] != '-') break;
1844    
1845      /* If we hit an argument that is just "-", it may be a reference to STDIN,
1846      but only if we have previously had -e or -f to define the patterns. */
1847    
1848      if (argv[i][1] == 0)
1849        {
1850        if (pattern_filename != NULL || pattern_count > 0) break;
1851          else exit(usage(2));
1852        }
1853    
1854      /* Handle a long name option, or -- to terminate the options */
1855    
1856      if (argv[i][1] == '-')
1857        {
1858        char *arg = argv[i] + 2;
1859        char *argequals = strchr(arg, '=');
1860    
1861        if (*arg == 0)    /* -- terminates options */
1862          {
1863          i++;
1864          break;                /* out of the options-handling loop */
1865        }        }
1866    
1867        longop = TRUE;
1868    
1869        /* Some long options have data that follows after =, for example file=name.
1870        Some options have variations in the long name spelling: specifically, we
1871        allow "regexp" because GNU grep allows it, though I personally go along
1872        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1873        These options are entered in the table as "regex(p)". No option is in both
1874        these categories, fortunately. */
1875    
1876      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1877        {        {
1878        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1879          char *equals = strchr(op->long_name, '=');
1880          if (opbra == NULL)     /* Not a (p) case */
1881          {          {
1882          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1883          break;            {
1884              if (strcmp(arg, op->long_name) == 0) break;
1885              }
1886            else                 /* Special case xxx=data */
1887              {
1888              int oplen = equals - op->long_name;
1889              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1890              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1891                {
1892                option_data = arg + arglen;
1893                if (*option_data == '=')
1894                  {
1895                  option_data++;
1896                  longopwasequals = TRUE;
1897                  }
1898                break;
1899                }
1900              }
1901            }
1902          else                   /* Special case xxxx(p) */
1903            {
1904            char buff1[24];
1905            char buff2[24];
1906            int baselen = opbra - op->long_name;
1907            sprintf(buff1, "%.*s", baselen, op->long_name);
1908            sprintf(buff2, "%s%.*s", buff1,
1909              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1910            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1911              break;
1912          }          }
1913        }        }
1914    
1915      if (op->one_char == 0)      if (op->one_char == 0)
1916        {        {
1917        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 519  for (i = 1; i < argc; i++) Line 1919  for (i = 1; i < argc; i++)
1919        }        }
1920      }      }
1921    
1922    /* One-char options */  
1923      /* Jeffrey Friedl's debugging harness uses these additional options which
1924      are not in the right form for putting in the option table because they use
1925      only one hyphen, yet are more than one character long. By putting them
1926      separately here, they will not get displayed as part of the help() output,
1927      but I don't think Jeffrey will care about that. */
1928    
1929    #ifdef JFRIEDL_DEBUG
1930      else if (strcmp(argv[i], "-pre") == 0) {
1931              jfriedl_prefix = argv[++i];
1932              continue;
1933      } else if (strcmp(argv[i], "-post") == 0) {
1934              jfriedl_postfix = argv[++i];
1935              continue;
1936      } else if (strcmp(argv[i], "-XT") == 0) {
1937              sscanf(argv[++i], "%d", &jfriedl_XT);
1938              continue;
1939      } else if (strcmp(argv[i], "-XR") == 0) {
1940              sscanf(argv[++i], "%d", &jfriedl_XR);
1941              continue;
1942      }
1943    #endif
1944    
1945    
1946      /* One-char options; many that have no data may be in a single argument; we
1947      continue till we hit the last one or one that needs data. */
1948    
1949    else    else
1950      {      {
1951      char *s = argv[i] + 1;      char *s = argv[i] + 1;
1952        longop = FALSE;
1953      while (*s != 0)      while (*s != 0)
1954        {        {
1955        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
1956            { if (*s == op->one_char) break; }
1957          if (op->one_char == 0)
1958          {          {
1959          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1960          if (pattern_filename[0] == 0)            *s, argv[i]);
1961            {          exit(usage(2));
1962            if (i >= argc - 1)          }
1963              {        if (op->type != OP_NODATA || s[1] == 0)
1964              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
1965              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
1966          break;          break;
1967          }          }
1968        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1969        }        }
1970      }      }
   }  
1971    
1972  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
1973  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
1974      something in the PCRE options. */
1975    
1976  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
1977    {      {
1978    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
1979    return 2;      continue;
1980    }      }
1981    
1982  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1983      either has a value or defaults to something. It cannot have data in a
1984      separate item. At the moment, the only such options are "colo(u)r" and
1985      Jeffrey Friedl's special -S debugging option. */
1986    
1987  if (pattern_filename != NULL)    if (*option_data == 0 &&
1988    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
1989      {      {
1990      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
1991        strerror(errno));        {
1992      return 2;        case N_COLOUR:
1993          colour_option = (char *)"auto";
1994          break;
1995    #ifdef JFRIEDL_DEBUG
1996          case 'S':
1997          S_arg = 0;
1998          break;
1999    #endif
2000          }
2001        continue;
2002      }      }
2003    while (fgets(buffer, sizeof(buffer), f) != NULL)  
2004      /* Otherwise, find the data string for the option. */
2005    
2006      if (*option_data == 0)
2007      {      {
2008      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
     if (pattern_count >= MAX_PATTERN_COUNT)  
2009        {        {
2010        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2011          exit(usage(2));
2012          }
2013        option_data = argv[++i];
2014        }
2015    
2016      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2017      multiple times to create a list of patterns. */
2018    
2019      if (op->type == OP_PATLIST)
2020        {
2021        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2022          {
2023          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2024          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2025        return 2;        return 2;
2026        }        }
2027      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2028      if (s == buffer) continue;      }
2029      *s = 0;  
2030      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2031        &errptr, NULL);  
2032      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2033        {
2034        *((char **)op->dataptr) = option_data;
2035        }
2036      else
2037        {
2038        char *endptr;
2039        int n = strtoul(option_data, &endptr, 10);
2040        if (*endptr != 0)
2041        {        {
2042        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
2043          pattern_count, errptr, error);          {
2044        return 2;          char *equals = strchr(op->long_name, '=');
2045            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2046              equals - op->long_name;
2047            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2048              option_data, nlen, op->long_name);
2049            }
2050          else
2051            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2052              option_data, op->one_char);
2053          exit(usage(2));
2054        }        }
2055        *((int *)op->dataptr) = n;
2056        }
2057      }
2058    
2059    /* Options have been decoded. If -C was used, its value is used as a default
2060    for -A and -B. */
2061    
2062    if (both_context > 0)
2063      {
2064      if (after_context == 0) after_context = both_context;
2065      if (before_context == 0) before_context = both_context;
2066      }
2067    
2068    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2069    However, the latter two set the only_matching flag. */
2070    
2071    if ((only_matching && (file_offsets || line_offsets)) ||
2072        (file_offsets && line_offsets))
2073      {
2074      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2075        "and/or --line-offsets\n");
2076      exit(usage(2));
2077      }
2078    
2079    if (file_offsets || line_offsets) only_matching = TRUE;
2080    
2081    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2082    LC_ALL environment variable is set, and if so, use it. */
2083    
2084    if (locale == NULL)
2085      {
2086      locale = getenv("LC_ALL");
2087      locale_from = "LCC_ALL";
2088      }
2089    
2090    if (locale == NULL)
2091      {
2092      locale = getenv("LC_CTYPE");
2093      locale_from = "LC_CTYPE";
2094      }
2095    
2096    /* If a locale has been provided, set it, and generate the tables the PCRE
2097    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2098    
2099    if (locale != NULL)
2100      {
2101      if (setlocale(LC_CTYPE, locale) == NULL)
2102        {
2103        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2104          locale, locale_from);
2105        return 2;
2106        }
2107      pcretables = pcre_maketables();
2108      }
2109    
2110    /* Sort out colouring */
2111    
2112    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2113      {
2114      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2115      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2116      else
2117        {
2118        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2119          colour_option);
2120        return 2;
2121        }
2122      if (do_colour)
2123        {
2124        char *cs = getenv("PCREGREP_COLOUR");
2125        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2126        if (cs != NULL) colour_string = cs;
2127      }      }
   fclose(f);  
2128    }    }
2129    
2130  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2131    
2132    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2133      {
2134      pcre_options |= PCRE_NEWLINE_CR;
2135      endlinetype = EL_CR;
2136      }
2137    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2138      {
2139      pcre_options |= PCRE_NEWLINE_LF;
2140      endlinetype = EL_LF;
2141      }
2142    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2143      {
2144      pcre_options |= PCRE_NEWLINE_CRLF;
2145      endlinetype = EL_CRLF;
2146      }
2147    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2148      {
2149      pcre_options |= PCRE_NEWLINE_ANY;
2150      endlinetype = EL_ANY;
2151      }
2152    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2153      {
2154      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2155      endlinetype = EL_ANYCRLF;
2156      }
2157  else  else
2158    {    {
2159    if (i >= argc) return usage(2);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2160    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2161    if (pattern_list[0] == NULL)    }
2162    
2163    /* Interpret the text values for -d and -D */
2164    
2165    if (dee_option != NULL)
2166      {
2167      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2168      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2169      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2170      else
2171      {      {
2172      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
       error);  
2173      return 2;      return 2;
2174      }      }
   pattern_count++;  
2175    }    }
2176    
2177  /* Study the regular expressions, as we will be running them may times */  if (DEE_option != NULL)
2178      {
2179      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2180      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2181      else
2182        {
2183        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2184        return 2;
2185        }
2186      }
2187    
2188    /* Check the values for Jeffrey Friedl's debugging options. */
2189    
2190    #ifdef JFRIEDL_DEBUG
2191    if (S_arg > 9)
2192      {
2193      fprintf(stderr, "pcregrep: bad value for -S option\n");
2194      return 2;
2195      }
2196    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2197      {
2198      if (jfriedl_XT == 0) jfriedl_XT = 1;
2199      if (jfriedl_XR == 0) jfriedl_XR = 1;
2200      }
2201    #endif
2202    
2203    /* Get memory to store the pattern and hints lists. */
2204    
2205    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2206    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2207    
2208    if (pattern_list == NULL || hints_list == NULL)
2209      {
2210      fprintf(stderr, "pcregrep: malloc failed\n");
2211      goto EXIT2;
2212      }
2213    
2214    /* If no patterns were provided by -e, and there is no file provided by -f,
2215    the first argument is the one and only pattern, and it must exist. */
2216    
2217    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2218      {
2219      if (i >= argc) return usage(2);
2220      patterns[cmd_pattern_count++] = argv[i++];
2221      }
2222    
2223    /* Compile the patterns that were provided on the command line, either by
2224    multiple uses of -e or as a single unkeyed pattern. */
2225    
2226    for (j = 0; j < cmd_pattern_count; j++)
2227      {
2228      if (!compile_pattern(patterns[j], pcre_options, NULL,
2229           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2230        goto EXIT2;
2231      }
2232    
2233    /* Compile the regular expressions that are provided in a file. */
2234    
2235    if (pattern_filename != NULL)
2236      {
2237      int linenumber = 0;
2238      FILE *f;
2239      char *filename;
2240      char buffer[MBUFTHIRD];
2241    
2242      if (strcmp(pattern_filename, "-") == 0)
2243        {
2244        f = stdin;
2245        filename = stdin_name;
2246        }
2247      else
2248        {
2249        f = fopen(pattern_filename, "r");
2250        if (f == NULL)
2251          {
2252          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2253            strerror(errno));
2254          goto EXIT2;
2255          }
2256        filename = pattern_filename;
2257        }
2258    
2259      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2260        {
2261        char *s = buffer + (int)strlen(buffer);
2262        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2263        *s = 0;
2264        linenumber++;
2265        if (buffer[0] == 0) continue;   /* Skip blank lines */
2266        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2267          goto EXIT2;
2268        }
2269    
2270      if (f != stdin) fclose(f);
2271      }
2272    
2273    /* Study the regular expressions, as we will be running them many times */
2274    
2275  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2276    {    {
# Line 615  for (j = 0; j < pattern_count; j++) Line 2280  for (j = 0; j < pattern_count; j++)
2280      char s[16];      char s[16];
2281      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2282      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2283      return 2;      goto EXIT2;
2284        }
2285      hint_count++;
2286      }
2287    
2288    /* If there are include or exclude patterns, compile them. */
2289    
2290    if (exclude_pattern != NULL)
2291      {
2292      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2293        pcretables);
2294      if (exclude_compiled == NULL)
2295        {
2296        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2297          errptr, error);
2298        goto EXIT2;
2299        }
2300      }
2301    
2302    if (include_pattern != NULL)
2303      {
2304      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2305        pcretables);
2306      if (include_compiled == NULL)
2307        {
2308        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2309          errptr, error);
2310        goto EXIT2;
2311        }
2312      }
2313    
2314    if (exclude_dir_pattern != NULL)
2315      {
2316      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2317        pcretables);
2318      if (exclude_dir_compiled == NULL)
2319        {
2320        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2321          errptr, error);
2322        goto EXIT2;
2323        }
2324      }
2325    
2326    if (include_dir_pattern != NULL)
2327      {
2328      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2329        pcretables);
2330      if (include_dir_compiled == NULL)
2331        {
2332        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2333          errptr, error);
2334        goto EXIT2;
2335      }      }
2336    }    }
2337    
2338  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2339    
2340  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2341      {
2342      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2343      goto EXIT;
2344      }
2345    
2346  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2347  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2348  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2349    otherwise forced. */
2350    
2351  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2352    
2353  for (; i < argc; i++)  for (; i < argc; i++)
2354    {    {
2355    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2356    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2357      if (frc > 1) rc = frc;
2358        else if (frc == 0 && rc == 1) rc = 0;
2359    }    }
2360    
2361    EXIT:
2362    if (pattern_list != NULL)
2363      {
2364      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2365      free(pattern_list);
2366      }
2367    if (hints_list != NULL)
2368      {
2369      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2370      free(hints_list);
2371      }
2372  return rc;  return rc;
2373    
2374    EXIT2:
2375    rc = 2;
2376    goto EXIT;
2377  }  }
2378    
2379  /* End */  /* End of pcregrep */

Legend:
Removed from v.63  
changed lines
  Added in v.357

  ViewVC Help
Powered by ViewVC 1.1.5