/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 63 by nigel, Sat Feb 24 21:40:03 2007 UTC revision 391 by ph10, Tue Mar 17 21:16:01 2009 UTC
# Line 4  Line 4 
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories. */  directories.
8    
9               Copyright (c) 1997-2009 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 19  directories. */ Line 70  directories. */
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "3.0 14-Jan-2003"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define MBUFTHIRD BUFSIZ
78    #else
79    #define MBUFTHIRD 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    
108    
109  /*************************************************  /*************************************************
110  *               Global variables                 *  *               Global variables                 *
111  *************************************************/  *************************************************/
112    
113    /* Jeffrey Friedl has some debugging requirements that are not part of the
114    regular code. */
115    
116    #ifdef JFRIEDL_DEBUG
117    static int S_arg = -1;
118    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120    static const char *jfriedl_prefix = "";
121    static const char *jfriedl_postfix = "";
122    #endif
123    
124    static int  endlinetype;
125    
126    static char *colour_string = (char *)"1;31";
127    static char *colour_option = NULL;
128    static char *dee_option = NULL;
129    static char *DEE_option = NULL;
130    static char *newline = NULL;
131  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
132    static char *stdin_name = (char *)"(standard input)";
133    static char *locale = NULL;
134    
135    static const unsigned char *pcretables = NULL;
136    
137  static int  pattern_count = 0;  static int  pattern_count = 0;
138  static pcre **pattern_list;  static pcre **pattern_list = NULL;
139  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
140    
141    static char *include_pattern = NULL;
142    static char *exclude_pattern = NULL;
143    static char *include_dir_pattern = NULL;
144    static char *exclude_dir_pattern = NULL;
145    
146    static pcre *include_compiled = NULL;
147    static pcre *exclude_compiled = NULL;
148    static pcre *include_dir_compiled = NULL;
149    static pcre *exclude_dir_compiled = NULL;
150    
151    static int after_context = 0;
152    static int before_context = 0;
153    static int both_context = 0;
154    static int dee_action = dee_READ;
155    static int DEE_action = DEE_READ;
156    static int error_count = 0;
157    static int filenames = FN_DEFAULT;
158    static int process_options = 0;
159    
160  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
161  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
162  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
163    static BOOL hyphenpending = FALSE;
164  static BOOL invert = FALSE;  static BOOL invert = FALSE;
165    static BOOL line_offsets = FALSE;
166    static BOOL multiline = FALSE;
167  static BOOL number = FALSE;  static BOOL number = FALSE;
168  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
169    static BOOL quiet = FALSE;
170  static BOOL silent = FALSE;  static BOOL silent = FALSE;
171  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
172    
173  /* Structure for options and list of them */  /* Structure for options and list of them */
174    
175    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
176           OP_PATLIST };
177    
178  typedef struct option_item {  typedef struct option_item {
179      int type;
180    int one_char;    int one_char;
181    char *long_name;    void *dataptr;
182    char *help_text;    const char *long_name;
183      const char *help_text;
184  } option_item;  } option_item;
185    
186    /* Options without a single-letter equivalent get a negative value. This can be
187    used to identify them. */
188    
189    #define N_COLOUR       (-1)
190    #define N_EXCLUDE      (-2)
191    #define N_EXCLUDE_DIR  (-3)
192    #define N_HELP         (-4)
193    #define N_INCLUDE      (-5)
194    #define N_INCLUDE_DIR  (-6)
195    #define N_LABEL        (-7)
196    #define N_LOCALE       (-8)
197    #define N_NULL         (-9)
198    #define N_LOFFSETS     (-10)
199    #define N_FOFFSETS     (-11)
200    
201  static option_item optionlist[] = {  static option_item optionlist[] = {
202    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
203    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
204    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
205    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
206    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
207    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
208    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
209    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
210    { 'u', "utf-8",        "use UTF-8 mode" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
211    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
212    { 'v', "invert-match", "select non-matching lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
213    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
214    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
215    { 0,    NULL,           NULL }    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
216      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
217      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
218      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
219      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
220      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
221      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
222      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
223      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
224      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
225      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
226      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
227      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
228      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
229      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
230      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
231      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
232      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
233      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
234    #ifdef JFRIEDL_DEBUG
235      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
236    #endif
237      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
238      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
239      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
240      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
241      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
242      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
243      { OP_NODATA,    0,        NULL,               NULL,            NULL }
244  };  };
245    
246    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
247    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
248    that the combination of -w and -x has the same effect as -x on its own, so we
249    can treat them as the same. */
250    
251    static const char *prefix[] = {
252      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
253    
254    static const char *suffix[] = {
255      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
256    
257    /* UTF-8 tables - used only when the newline setting is "any". */
258    
259    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
260    
261    const char utf8_table4[] = {
262      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
265      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
266    
267    
268    
269  /*************************************************  /*************************************************
270  *       Functions for directory scanning         *  *            OS-specific functions               *
271  *************************************************/  *************************************************/
272    
273  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
274  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
275    
276    
277  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
278    
279  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
280  #include <sys/types.h>  #include <sys/types.h>
281  #include <sys/stat.h>  #include <sys/stat.h>
282  #include <dirent.h>  #include <dirent.h>
283    
284  typedef DIR directory_type;  typedef DIR directory_type;
285    
286  int  static int
287  isdirectory(char *filename)  isdirectory(char *filename)
288  {  {
289  struct stat statbuf;  struct stat statbuf;
# Line 94  if (stat(filename, &statbuf) < 0) Line 292  if (stat(filename, &statbuf) < 0)
292  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
293  }  }
294    
295  directory_type *  static directory_type *
296  opendirectory(char *filename)  opendirectory(char *filename)
297  {  {
298  return opendir(filename);  return opendir(filename);
299  }  }
300    
301  char *  static char *
302  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
303  {  {
304  for (;;)  for (;;)
# Line 110  for (;;) Line 308  for (;;)
308    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
309      return dent->d_name;      return dent->d_name;
310    }    }
311  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
312  }  }
313    
314  void  static void
315  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
316  {  {
317  closedir(dir);  closedir(dir);
318  }  }
319    
320    
321    /************* Test for regular file in Unix **********/
322    
323    static int
324    isregfile(char *filename)
325    {
326    struct stat statbuf;
327    if (stat(filename, &statbuf) < 0)
328      return 1;        /* In the expectation that opening as a file will fail */
329    return (statbuf.st_mode & S_IFMT) == S_IFREG;
330    }
331    
332    
333    /************* Test stdout for being a terminal in Unix **********/
334    
335    static BOOL
336    is_stdout_tty(void)
337    {
338    return isatty(fileno(stdout));
339    }
340    
341    
342  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
343    
344  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
345  Lionel Fourquaux. */  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
346    when it did not exist. David Byron added a patch that moved the #include of
347    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
348    */
349    
350  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
351    
352  #ifndef STRICT  #ifndef STRICT
353  # define STRICT  # define STRICT
# Line 134  Lionel Fourquaux. */ Line 355  Lionel Fourquaux. */
355  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
356  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
357  #endif  #endif
358    
359  #include <windows.h>  #include <windows.h>
360    
361    #ifndef INVALID_FILE_ATTRIBUTES
362    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
363    #endif
364    
365  typedef struct directory_type  typedef struct directory_type
366  {  {
367  HANDLE handle;  HANDLE handle;
# Line 213  free(dir); Line 439  free(dir);
439  }  }
440    
441    
442    /************* Test for regular file in Win32 **********/
443    
444    /* I don't know how to do this, or if it can be done; assume all paths are
445    regular if they are not directories. */
446    
447    int isregfile(char *filename)
448    {
449    return !isdirectory(filename);
450    }
451    
452    
453    /************* Test stdout for being a terminal in Win32 **********/
454    
455    /* I don't know how to do this; assume never */
456    
457    static BOOL
458    is_stdout_tty(void)
459    {
460    return FALSE;
461    }
462    
463    
464  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
465    
466  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 221  free(dir); Line 469  free(dir);
469    
470  typedef void directory_type;  typedef void directory_type;
471    
472  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
473  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
474  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
475  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
476    
477    
478    /************* Test for regular when we can't do it **********/
479    
480    /* Assume all files are regular. */
481    
482    int isregfile(char *filename) { return 1; }
483    
484    
485    /************* Test stdout for being a terminal when we can't do it **********/
486    
487    static BOOL
488    is_stdout_tty(void)
489    {
490    return FALSE;
491    }
492    
493    
494  #endif  #endif
495    
496    
497    
498  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
499  /*************************************************  /*************************************************
500  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
501  *************************************************/  *************************************************/
# Line 253  return sys_errlist[n]; Line 518  return sys_errlist[n];
518    
519    
520  /*************************************************  /*************************************************
521  *              Grep an individual file           *  *             Find end of line                   *
522  *************************************************/  *************************************************/
523    
524  static int  /* The length of the endline sequence that is found is set via lenptr. This may
525  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
526  {  
527  int rc = 1;  Arguments:
528  int linenumber = 0;    p         current position in line
529  int count = 0;    endptr    end of available data
530  int offsets[99];    lenptr    where to put the length of the eol sequence
 char buffer[BUFSIZ];  
531    
532  while (fgets(buffer, sizeof(buffer), in) != NULL)  Returns:    pointer to the last byte of the line
533    */
534    
535    static char *
536    end_of_line(char *p, char *endptr, int *lenptr)
537    {
538    switch(endlinetype)
539    {    {
540    BOOL match = FALSE;    default:      /* Just in case */
541    int i;    case EL_LF:
542    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
543    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
544    linenumber++;      {
545        *lenptr = 1;
546        return p + 1;
547        }
548      *lenptr = 0;
549      return endptr;
550    
551    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
552      while (p < endptr && *p != '\r') p++;
553      if (p < endptr)
554      {      {
555      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
556        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
557      }      }
558      *lenptr = 0;
559      return endptr;
560    
561    if (match != invert)    case EL_CRLF:
562      for (;;)
563      {      {
564      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
565        if (++p >= endptr)
566          {
567          *lenptr = 0;
568          return endptr;
569          }
570        if (*p == '\n')
571          {
572          *lenptr = 2;
573          return p + 1;
574          }
575        }
576      break;
577    
578      case EL_ANYCRLF:
579      while (p < endptr)
580        {
581        int extra = 0;
582        register int c = *((unsigned char *)p);
583    
584      else if (filenames_only)      if (utf8 && c >= 0xc0)
585        {        {
586        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
587        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
588          gcss = 6*extra;
589          c = (c & utf8_table3[extra]) << gcss;
590          for (gcii = 1; gcii <= extra; gcii++)
591            {
592            gcss -= 6;
593            c |= (p[gcii] & 0x3f) << gcss;
594            }
595        }        }
596    
597      else if (silent) return 0;      p += 1 + extra;
598    
599      else      switch (c)
600        {        {
601        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
602        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
603        fprintf(stdout, "%s\n", buffer);        return p;
604    
605          case 0x0d:    /* CR */
606          if (p < endptr && *p == 0x0a)
607            {
608            *lenptr = 2;
609            p++;
610            }
611          else *lenptr = 1;
612          return p;
613    
614          default:
615          break;
616        }        }
617        }   /* End of loop for ANYCRLF case */
618    
619      rc = 0;    *lenptr = 0;  /* Must have hit the end */
620      }    return endptr;
   }  
621    
622  if (count_only)    case EL_ANY:
623    {    while (p < endptr)
624    if (name != NULL) fprintf(stdout, "%s:", name);      {
625    fprintf(stdout, "%d\n", count);      int extra = 0;
626    }      register int c = *((unsigned char *)p);
627    
628  return rc;      if (utf8 && c >= 0xc0)
629  }        {
630          int gcii, gcss;
631          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
632          gcss = 6*extra;
633          c = (c & utf8_table3[extra]) << gcss;
634          for (gcii = 1; gcii <= extra; gcii++)
635            {
636            gcss -= 6;
637            c |= (p[gcii] & 0x3f) << gcss;
638            }
639          }
640    
641        p += 1 + extra;
642    
643        switch (c)
644          {
645          case 0x0a:    /* LF */
646          case 0x0b:    /* VT */
647          case 0x0c:    /* FF */
648          *lenptr = 1;
649          return p;
650    
651          case 0x0d:    /* CR */
652          if (p < endptr && *p == 0x0a)
653            {
654            *lenptr = 2;
655            p++;
656            }
657          else *lenptr = 1;
658          return p;
659    
660          case 0x85:    /* NEL */
661          *lenptr = utf8? 2 : 1;
662          return p;
663    
664          case 0x2028:  /* LS */
665          case 0x2029:  /* PS */
666          *lenptr = 3;
667          return p;
668    
669          default:
670          break;
671          }
672        }   /* End of loop for ANY case */
673    
674      *lenptr = 0;  /* Must have hit the end */
675      return endptr;
676      }     /* End of overall switch */
677    }
678    
679    
680    
681  /*************************************************  /*************************************************
682  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
683  *************************************************/  *************************************************/
684    
685  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
686    
687  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
688  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
689      startptr  start of available data
690    
691  if ((sep = isdirectory(filename)) != 0 && recurse)  Returns:    pointer to the start of the previous line
692    {  */
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
693    
694    if (dir == NULL)  static char *
695      {  previous_line(char *p, char *startptr)
696      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  {
697        strerror(errno));  switch(endlinetype)
698      return 2;    {
699      }    default:      /* Just in case */
700      case EL_LF:
701      p--;
702      while (p > startptr && p[-1] != '\n') p--;
703      return p;
704    
705      case EL_CR:
706      p--;
707      while (p > startptr && p[-1] != '\n') p--;
708      return p;
709    
710    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
711      for (;;)
712      {      {
713      int frc;      p -= 2;
714      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      while (p > startptr && p[-1] != '\n') p--;
715      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);      if (p <= startptr + 1 || p[-2] == '\r') return p;
     if (frc == 0 && rc == 1) rc = 0;  
716      }      }
717      return p;   /* But control should never get here */
718    
719    closedirectory(dir);    case EL_ANY:
720    return rc;    case EL_ANYCRLF:
721    }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
722      if (utf8) while ((*p & 0xc0) == 0x80) p--;
723    
724  /* If the file is not a directory, or we are not recursing, scan it. If this is    while (p > startptr)
725  the first and only argument at top level, we don't show the file name (unless      {
726  we are only showing the file name). Otherwise, control is via the      register int c;
727  show_filenames variable. */      char *pp = p - 1;
728    
729  in = fopen(filename, "r");      if (utf8)
730  if (in == NULL)        {
731    {        int extra = 0;
732    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));        while ((*pp & 0xc0) == 0x80) pp--;
733    return 2;        c = *((unsigned char *)pp);
734    }        if (c >= 0xc0)
735            {
736            int gcii, gcss;
737            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
738            gcss = 6*extra;
739            c = (c & utf8_table3[extra]) << gcss;
740            for (gcii = 1; gcii <= extra; gcii++)
741              {
742              gcss -= 6;
743              c |= (pp[gcii] & 0x3f) << gcss;
744              }
745            }
746          }
747        else c = *((unsigned char *)pp);
748    
749  rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?      if (endlinetype == EL_ANYCRLF) switch (c)
750    filename : NULL);        {
751  fclose(in);        case 0x0a:    /* LF */
752  return rc;        case 0x0d:    /* CR */
753  }        return p;
754    
755          default:
756          break;
757          }
758    
759        else switch (c)
760          {
761          case 0x0a:    /* LF */
762          case 0x0b:    /* VT */
763          case 0x0c:    /* FF */
764          case 0x0d:    /* CR */
765          case 0x85:    /* NEL */
766          case 0x2028:  /* LS */
767          case 0x2029:  /* PS */
768          return p;
769    
770          default:
771          break;
772          }
773    
774  /*************************************************      p = pp;  /* Back one character */
775  *                Usage function                  *      }        /* End of loop for ANY case */
 *************************************************/  
776    
777  static int    return startptr;  /* Hit start of data */
778  usage(int rc)    }     /* End of overall switch */
 {  
 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
779  }  }
780    
781    
782    
783    
784    
785  /*************************************************  /*************************************************
786  *                Help function                   *  *       Print the previous "after" lines         *
787  *************************************************/  *************************************************/
788    
789  static void  /* This is called if we are about to lose said lines because of buffer filling,
790  help(void)  and at the end of the file. The data in the line is written using fwrite() so
791  {  that a binary zero does not terminate it.
792  option_item *op;  
793    Arguments:
794  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");    lastmatchnumber   the number of the last matching line, plus one
795  printf("Search for PATTERN in each FILE or standard input.\n");    lastmatchrestart  where we restarted after the last match
796  printf("PATTERN must be present if -f is not used.\n");    endptr            end of available data
797  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");    printname         filename for printing
798    
799  printf("Options:\n");  Returns:            nothing
800    */
801    
802  for (op = optionlist; op->one_char != 0; op++)  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
803      char *endptr, char *printname)
804    {
805    if (after_context > 0 && lastmatchnumber > 0)
806    {    {
807    int n;    int count = 0;
808    char s[4];    while (lastmatchrestart < endptr && count++ < after_context)
809    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");      {
810    printf("  %s --%s%n", s, op->long_name, &n);      int ellength;
811    n = 30 - n;      char *pp = lastmatchrestart;
812    if (n < 1) n = 1;      if (printname != NULL) fprintf(stdout, "%s-", printname);
813    printf("%.*s%s\n", n, "                    ", op->help_text);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
814        pp = end_of_line(pp, endptr, &ellength);
815        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
816        lastmatchrestart = pp;
817        }
818      hyphenpending = TRUE;
819    }    }
   
 printf("\n  -f<filename>  or  --file=<filename>\n");  
 printf("    Read patterns from <filename> instead of using a command line option.\n");  
 printf("    Trailing white space is removed; blanks lines are ignored.\n");  
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
820  }  }
821    
822    
823    
   
824  /*************************************************  /*************************************************
825  *                Handle an option                *  *   Apply patterns to subject till one matches   *
826  *************************************************/  *************************************************/
827    
828  static int  /* This function is called to run through all patterns, looking for a match. It
829  handle_option(int letter, int options)  is used multiple times for the same subject when colouring is enabled, in order
830    to find all possible matches.
831    
832    Arguments:
833      matchptr    the start of the subject
834      length      the length of the subject to match
835      offsets     the offets vector to fill in
836      mrc         address of where to put the result of pcre_exec()
837    
838    Returns:      TRUE if there was a match
839                  FALSE if there was no match
840                  invert if there was a non-fatal error
841    */
842    
843    static BOOL
844    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
845  {  {
846  switch(letter)  int i;
847    for (i = 0; i < pattern_count; i++)
848    {    {
849    case -1:  help(); exit(0);    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
850    case 'c': count_only = TRUE; break;      PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
851    case 'h': filenames = FALSE; break;    if (*mrc >= 0) return TRUE;
852    case 'i': options |= PCRE_CASELESS; break;    if (*mrc == PCRE_ERROR_NOMATCH) continue;
853    case 'l': filenames_only = TRUE;    fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
854    case 'n': number = TRUE; break;    if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
855    case 'r': recurse = TRUE; break;    fprintf(stderr, "this text:\n");
856    case 's': silent = TRUE; break;    fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */
857    case 'u': options |= PCRE_UTF8; break;    fprintf(stderr, "\n");
858    case 'v': invert = TRUE; break;    if (error_count == 0 &&
859    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;        (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
860        {
861    case 'V':      fprintf(stderr, "pcregrep: error %d means that a resource limit "
862    fprintf(stderr, "pcregrep version %s using ", VERSION);        "was exceeded\n", *mrc);
863    fprintf(stderr, "PCRE version %s\n", pcre_version());      fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
864    exit(0);      }
865    break;    if (error_count++ > 20)
866        {
867    default:      fprintf(stderr, "pcregrep: too many errors - abandoned\n");
868    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);      exit(2);
869    exit(usage(2));      }
870      return invert;    /* No more matching; don't show the line again */
871    }    }
872    
873  return options;  return FALSE;  /* No match, no errors */
874  }  }
875    
876    
877    
   
878  /*************************************************  /*************************************************
879  *                Main program                    *  *            Grep an individual file             *
880  *************************************************/  *************************************************/
881    
882  int  /* This is called from grep_or_recurse() below. It uses a buffer that is three
883  main(int argc, char **argv)  times the value of MBUFTHIRD. The matching point is never allowed to stray into
884    the top third of the buffer, thus keeping more of the file available for
885    context printing or for multiline scanning. For large files, the pointer will
886    be in the middle third most of the time, so the bottom third is available for
887    "before" context printing.
888    
889    Arguments:
890      handle       the fopened FILE stream for a normal file
891                   the gzFile pointer when reading is via libz
892                   the BZFILE pointer when reading is via libbz2
893      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
894      printname    the file name if it is to be printed for each match
895                   or NULL if the file name is not to be printed
896                   it cannot be NULL if filenames[_nomatch]_only is set
897    
898    Returns:       0 if there was at least one match
899                   1 otherwise (no matches)
900                   2 if there is a read error on a .bz2 file
901    */
902    
903    static int
904    pcregrep(void *handle, int frtype, char *printname)
905  {  {
 int i, j;  
906  int rc = 1;  int rc = 1;
907  int options = 0;  int linenumber = 1;
908  int errptr;  int lastmatchnumber = 0;
909  const char *error;  int count = 0;
910  BOOL only_one_at_top;  int filepos = 0;
911    int offsets[OFFSET_SIZE];
912    char *lastmatchrestart = NULL;
913    char buffer[3*MBUFTHIRD];
914    char *ptr = buffer;
915    char *endptr;
916    size_t bufflength;
917    BOOL endhyphenpending = FALSE;
918    FILE *in = NULL;                    /* Ensure initialized */
919    
920  /* Process the options */  #ifdef SUPPORT_LIBZ
921    gzFile ingz = NULL;
922    #endif
923    
924  for (i = 1; i < argc; i++)  #ifdef SUPPORT_LIBBZ2
925    BZFILE *inbz2 = NULL;
926    #endif
927    
928    
929    /* Do the first read into the start of the buffer and set up the pointer to end
930    of what we have. In the case of libz, a non-zipped .gz file will be read as a
931    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
932    fail. */
933    
934    #ifdef SUPPORT_LIBZ
935    if (frtype == FR_LIBZ)
936    {    {
937    if (argv[i][0] != '-') break;    ingz = (gzFile)handle;
938      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
939      }
940    else
941    #endif
942    
943    /* Missing options */  #ifdef SUPPORT_LIBBZ2
944    if (frtype == FR_LIBBZ2)
945      {
946      inbz2 = (BZFILE *)handle;
947      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
948      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
949      }                                    /* without the cast it is unsigned. */
950    else
951    #endif
952    
953    if (argv[i][1] == 0) exit(usage(2));    {
954      in = (FILE *)handle;
955      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
956      }
957    
958    /* Long name options */  endptr = buffer + bufflength;
959    
960    if (argv[i][1] == '-')  /* Loop while the current pointer is not at the end of the file. For large
961      {  files, endptr will be at the end of the buffer when we are in the middle of the
962      option_item *op;  file, but ptr will never get there, because as soon as it gets over 2/3 of the
963    way, the buffer is shifted left and re-filled. */
964    
965      if (strncmp(argv[i]+2, "file=", 5) == 0)  while (ptr < endptr)
966        {    {
967        pattern_filename = argv[i] + 7;    int endlinelength;
968        continue;    int mrc = 0;
969        }    BOOL match;
970      char *matchptr = ptr;
971      char *t = ptr;
972      size_t length, linelength;
973    
974      /* At this point, ptr is at the start of a line. We need to find the length
975      of the subject string to pass to pcre_exec(). In multiline mode, it is the
976      length remainder of the data in the buffer. Otherwise, it is the length of
977      the next line, excluding the terminating newline. After matching, we always
978      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
979      option is used for compiling, so that any match is constrained to be in the
980      first line. */
981    
982      t = end_of_line(t, endptr, &endlinelength);
983      linelength = t - ptr - endlinelength;
984      length = multiline? (size_t)(endptr - ptr) : linelength;
985    
986      /* Extra processing for Jeffrey Friedl's debugging. */
987    
988    #ifdef JFRIEDL_DEBUG
989      if (jfriedl_XT || jfriedl_XR)
990      {
991          #include <sys/time.h>
992          #include <time.h>
993          struct timeval start_time, end_time;
994          struct timezone dummy;
995          int i;
996    
997          if (jfriedl_XT)
998          {
999              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1000              const char *orig = ptr;
1001              ptr = malloc(newlen + 1);
1002              if (!ptr) {
1003                      printf("out of memory");
1004                      exit(2);
1005              }
1006              endptr = ptr;
1007              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1008              for (i = 0; i < jfriedl_XT; i++) {
1009                      strncpy(endptr, orig,  length);
1010                      endptr += length;
1011              }
1012              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1013              length = newlen;
1014          }
1015    
1016          if (gettimeofday(&start_time, &dummy) != 0)
1017                  perror("bad gettimeofday");
1018    
1019    
1020          for (i = 0; i < jfriedl_XR; i++)
1021              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1022                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1023    
1024          if (gettimeofday(&end_time, &dummy) != 0)
1025                  perror("bad gettimeofday");
1026    
1027          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1028                          -
1029                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1030    
1031          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1032          return 0;
1033      }
1034    #endif
1035    
1036      /* We come back here after a match when the -o option (only_matching) is set,
1037      in order to find any further matches in the same line. */
1038    
1039      ONLY_MATCHING_RESTART:
1040    
1041      /* Run through all the patterns until one matches or there is an error other
1042      than NOMATCH. This code is in a subroutine so that it can be re-used for
1043      finding subsequent matches when colouring matched lines. */
1044    
1045      match = match_patterns(matchptr, length, offsets, &mrc);
1046    
1047      /* If it's a match or a not-match (as required), do what's wanted. */
1048    
1049      if (match != invert)
1050        {
1051        BOOL hyphenprinted = FALSE;
1052    
1053        /* We've failed if we want a file that doesn't have any matches. */
1054    
1055        if (filenames == FN_NOMATCH_ONLY) return 1;
1056    
1057        /* Just count if just counting is wanted. */
1058    
1059        if (count_only) count++;
1060    
1061        /* If all we want is a file name, there is no need to scan any more lines
1062        in the file. */
1063    
1064        else if (filenames == FN_ONLY)
1065          {
1066          fprintf(stdout, "%s\n", printname);
1067          return 0;
1068          }
1069    
1070        /* Likewise, if all we want is a yes/no answer. */
1071    
1072        else if (quiet) return 0;
1073    
1074        /* The --only-matching option prints just the substring that matched, and
1075        the --file-offsets and --line-offsets options output offsets for the
1076        matching substring (they both force --only-matching). None of these options
1077        prints any context. Afterwards, adjust the start and length, and then jump
1078        back to look for further matches in the same line. If we are in invert
1079        mode, however, nothing is printed - this could be still useful because the
1080        return code is set. */
1081    
1082        else if (only_matching)
1083          {
1084          if (!invert)
1085            {
1086            if (printname != NULL) fprintf(stdout, "%s:", printname);
1087            if (number) fprintf(stdout, "%d:", linenumber);
1088            if (line_offsets)
1089              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1090                offsets[1] - offsets[0]);
1091            else if (file_offsets)
1092              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1093                offsets[1] - offsets[0]);
1094            else
1095              {
1096              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1097              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1098              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1099              }
1100            fprintf(stdout, "\n");
1101            matchptr += offsets[1];
1102            length -= offsets[1];
1103            match = FALSE;
1104            goto ONLY_MATCHING_RESTART;
1105            }
1106          }
1107    
1108        /* This is the default case when none of the above options is set. We print
1109        the matching lines(s), possibly preceded and/or followed by other lines of
1110        context. */
1111    
1112        else
1113          {
1114          /* See if there is a requirement to print some "after" lines from a
1115          previous match. We never print any overlaps. */
1116    
1117          if (after_context > 0 && lastmatchnumber > 0)
1118            {
1119            int ellength;
1120            int linecount = 0;
1121            char *p = lastmatchrestart;
1122    
1123            while (p < ptr && linecount < after_context)
1124              {
1125              p = end_of_line(p, ptr, &ellength);
1126              linecount++;
1127              }
1128    
1129            /* It is important to advance lastmatchrestart during this printing so
1130            that it interacts correctly with any "before" printing below. Print
1131            each line's data using fwrite() in case there are binary zeroes. */
1132    
1133            while (lastmatchrestart < p)
1134              {
1135              char *pp = lastmatchrestart;
1136              if (printname != NULL) fprintf(stdout, "%s-", printname);
1137              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1138              pp = end_of_line(pp, endptr, &ellength);
1139              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1140              lastmatchrestart = pp;
1141              }
1142            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1143            }
1144    
1145          /* If there were non-contiguous lines printed above, insert hyphens. */
1146    
1147          if (hyphenpending)
1148            {
1149            fprintf(stdout, "--\n");
1150            hyphenpending = FALSE;
1151            hyphenprinted = TRUE;
1152            }
1153    
1154          /* See if there is a requirement to print some "before" lines for this
1155          match. Again, don't print overlaps. */
1156    
1157          if (before_context > 0)
1158            {
1159            int linecount = 0;
1160            char *p = ptr;
1161    
1162            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1163                   linecount < before_context)
1164              {
1165              linecount++;
1166              p = previous_line(p, buffer);
1167              }
1168    
1169            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1170              fprintf(stdout, "--\n");
1171    
1172            while (p < ptr)
1173              {
1174              int ellength;
1175              char *pp = p;
1176              if (printname != NULL) fprintf(stdout, "%s-", printname);
1177              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1178              pp = end_of_line(pp, endptr, &ellength);
1179              fwrite(p, 1, pp - p, stdout);
1180              p = pp;
1181              }
1182            }
1183    
1184          /* Now print the matching line(s); ensure we set hyphenpending at the end
1185          of the file if any context lines are being output. */
1186    
1187          if (after_context > 0 || before_context > 0)
1188            endhyphenpending = TRUE;
1189    
1190          if (printname != NULL) fprintf(stdout, "%s:", printname);
1191          if (number) fprintf(stdout, "%d:", linenumber);
1192    
1193          /* In multiline mode, we want to print to the end of the line in which
1194          the end of the matched string is found, so we adjust linelength and the
1195          line number appropriately, but only when there actually was a match
1196          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1197          the match will always be before the first newline sequence. */
1198    
1199          if (multiline)
1200            {
1201            int ellength;
1202            char *endmatch = ptr;
1203            if (!invert)
1204              {
1205              endmatch += offsets[1];
1206              t = ptr;
1207              while (t < endmatch)
1208                {
1209                t = end_of_line(t, endptr, &ellength);
1210                if (t <= endmatch) linenumber++; else break;
1211                }
1212              }
1213            endmatch = end_of_line(endmatch, endptr, &ellength);
1214            linelength = endmatch - ptr - ellength;
1215            }
1216    
1217          /*** NOTE: Use only fwrite() to output the data line, so that binary
1218          zeroes are treated as just another data character. */
1219    
1220          /* This extra option, for Jeffrey Friedl's debugging requirements,
1221          replaces the matched string, or a specific captured string if it exists,
1222          with X. When this happens, colouring is ignored. */
1223    
1224    #ifdef JFRIEDL_DEBUG
1225          if (S_arg >= 0 && S_arg < mrc)
1226            {
1227            int first = S_arg * 2;
1228            int last  = first + 1;
1229            fwrite(ptr, 1, offsets[first], stdout);
1230            fprintf(stdout, "X");
1231            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1232            }
1233          else
1234    #endif
1235    
1236          /* We have to split the line(s) up if colouring, and search for further
1237          matches. */
1238    
1239          if (do_colour)
1240            {
1241            int last_offset = 0;
1242            fwrite(ptr, 1, offsets[0], stdout);
1243            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1244            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1245            fprintf(stdout, "%c[00m", 0x1b);
1246            for (;;)
1247              {
1248              last_offset += offsets[1];
1249              matchptr += offsets[1];
1250              length -= offsets[1];
1251              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1252              fwrite(matchptr, 1, offsets[0], stdout);
1253              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1254              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1255              fprintf(stdout, "%c[00m", 0x1b);
1256              }
1257            fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1258              stdout);
1259            }
1260    
1261          /* Not colouring; no need to search for further matches */
1262    
1263          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1264          }
1265    
1266        /* End of doing what has to be done for a match */
1267    
1268        rc = 0;    /* Had some success */
1269    
1270        /* Remember where the last match happened for after_context. We remember
1271        where we are about to restart, and that line's number. */
1272    
1273        lastmatchrestart = ptr + linelength + endlinelength;
1274        lastmatchnumber = linenumber + 1;
1275        }
1276    
1277      /* For a match in multiline inverted mode (which of course did not cause
1278      anything to be printed), we have to move on to the end of the match before
1279      proceeding. */
1280    
1281      if (multiline && invert && match)
1282        {
1283        int ellength;
1284        char *endmatch = ptr + offsets[1];
1285        t = ptr;
1286        while (t < endmatch)
1287          {
1288          t = end_of_line(t, endptr, &ellength);
1289          if (t <= endmatch) linenumber++; else break;
1290          }
1291        endmatch = end_of_line(endmatch, endptr, &ellength);
1292        linelength = endmatch - ptr - ellength;
1293        }
1294    
1295      /* Advance to after the newline and increment the line number. The file
1296      offset to the current line is maintained in filepos. */
1297    
1298      ptr += linelength + endlinelength;
1299      filepos += linelength + endlinelength;
1300      linenumber++;
1301    
1302      /* If we haven't yet reached the end of the file (the buffer is full), and
1303      the current point is in the top 1/3 of the buffer, slide the buffer down by
1304      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1305      about to be lost, print them. */
1306    
1307      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1308        {
1309        if (after_context > 0 &&
1310            lastmatchnumber > 0 &&
1311            lastmatchrestart < buffer + MBUFTHIRD)
1312          {
1313          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1314          lastmatchnumber = 0;
1315          }
1316    
1317        /* Now do the shuffle */
1318    
1319        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1320        ptr -= MBUFTHIRD;
1321    
1322    #ifdef SUPPORT_LIBZ
1323        if (frtype == FR_LIBZ)
1324          bufflength = 2*MBUFTHIRD +
1325            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1326        else
1327    #endif
1328    
1329    #ifdef SUPPORT_LIBBZ2
1330        if (frtype == FR_LIBBZ2)
1331          bufflength = 2*MBUFTHIRD +
1332            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1333        else
1334    #endif
1335    
1336        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1337    
1338        endptr = buffer + bufflength;
1339    
1340        /* Adjust any last match point */
1341    
1342        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1343        }
1344      }     /* Loop through the whole file */
1345    
1346    /* End of file; print final "after" lines if wanted; do_after_lines sets
1347    hyphenpending if it prints something. */
1348    
1349    if (!only_matching && !count_only)
1350      {
1351      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1352      hyphenpending |= endhyphenpending;
1353      }
1354    
1355    /* Print the file name if we are looking for those without matches and there
1356    were none. If we found a match, we won't have got this far. */
1357    
1358    if (filenames == FN_NOMATCH_ONLY)
1359      {
1360      fprintf(stdout, "%s\n", printname);
1361      return 0;
1362      }
1363    
1364    /* Print the match count if wanted */
1365    
1366    if (count_only)
1367      {
1368      if (printname != NULL) fprintf(stdout, "%s:", printname);
1369      fprintf(stdout, "%d\n", count);
1370      }
1371    
1372    return rc;
1373    }
1374    
1375    
1376    
1377    /*************************************************
1378    *     Grep a file or recurse into a directory    *
1379    *************************************************/
1380    
1381    /* Given a path name, if it's a directory, scan all the files if we are
1382    recursing; if it's a file, grep it.
1383    
1384    Arguments:
1385      pathname          the path to investigate
1386      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1387      only_one_at_top   TRUE if the path is the only one at toplevel
1388    
1389    Returns:   0 if there was at least one match
1390               1 if there were no matches
1391               2 there was some kind of error
1392    
1393    However, file opening failures are suppressed if "silent" is set.
1394    */
1395    
1396    static int
1397    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1398    {
1399    int rc = 1;
1400    int sep;
1401    int frtype;
1402    int pathlen;
1403    void *handle;
1404    FILE *in = NULL;           /* Ensure initialized */
1405    
1406    #ifdef SUPPORT_LIBZ
1407    gzFile ingz = NULL;
1408    #endif
1409    
1410    #ifdef SUPPORT_LIBBZ2
1411    BZFILE *inbz2 = NULL;
1412    #endif
1413    
1414    /* If the file name is "-" we scan stdin */
1415    
1416    if (strcmp(pathname, "-") == 0)
1417      {
1418      return pcregrep(stdin, FR_PLAIN,
1419        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1420          stdin_name : NULL);
1421      }
1422    
1423    /* If the file is a directory, skip if skipping or if we are recursing, scan
1424    each file and directory within it, subject to any include or exclude patterns
1425    that were set. The scanning code is localized so it can be made
1426    system-specific. */
1427    
1428    if ((sep = isdirectory(pathname)) != 0)
1429      {
1430      if (dee_action == dee_SKIP) return 1;
1431      if (dee_action == dee_RECURSE)
1432        {
1433        char buffer[1024];
1434        char *nextfile;
1435        directory_type *dir = opendirectory(pathname);
1436    
1437        if (dir == NULL)
1438          {
1439          if (!silent)
1440            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1441              strerror(errno));
1442          return 2;
1443          }
1444    
1445        while ((nextfile = readdirectory(dir)) != NULL)
1446          {
1447          int frc, nflen;
1448          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1449          nflen = strlen(nextfile);
1450    
1451          if (isdirectory(buffer))
1452            {
1453            if (exclude_dir_compiled != NULL &&
1454                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1455              continue;
1456    
1457            if (include_dir_compiled != NULL &&
1458                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1459              continue;
1460            }
1461          else
1462            {
1463            if (exclude_compiled != NULL &&
1464                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1465              continue;
1466    
1467            if (include_compiled != NULL &&
1468                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1469              continue;
1470            }
1471    
1472          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1473          if (frc > 1) rc = frc;
1474           else if (frc == 0 && rc == 1) rc = 0;
1475          }
1476    
1477        closedirectory(dir);
1478        return rc;
1479        }
1480      }
1481    
1482    /* If the file is not a directory and not a regular file, skip it if that's
1483    been requested. */
1484    
1485    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1486    
1487    /* Control reaches here if we have a regular file, or if we have a directory
1488    and recursion or skipping was not requested, or if we have anything else and
1489    skipping was not requested. The scan proceeds. If this is the first and only
1490    argument at top level, we don't show the file name, unless we are only showing
1491    the file name, or the filename was forced (-H). */
1492    
1493    pathlen = strlen(pathname);
1494    
1495    /* Open using zlib if it is supported and the file name ends with .gz. */
1496    
1497    #ifdef SUPPORT_LIBZ
1498    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1499      {
1500      ingz = gzopen(pathname, "rb");
1501      if (ingz == NULL)
1502        {
1503        if (!silent)
1504          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1505            strerror(errno));
1506        return 2;
1507        }
1508      handle = (void *)ingz;
1509      frtype = FR_LIBZ;
1510      }
1511    else
1512    #endif
1513    
1514    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1515    
1516    #ifdef SUPPORT_LIBBZ2
1517    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1518      {
1519      inbz2 = BZ2_bzopen(pathname, "rb");
1520      handle = (void *)inbz2;
1521      frtype = FR_LIBBZ2;
1522      }
1523    else
1524    #endif
1525    
1526    /* Otherwise use plain fopen(). The label is so that we can come back here if
1527    an attempt to read a .bz2 file indicates that it really is a plain file. */
1528    
1529    #ifdef SUPPORT_LIBBZ2
1530    PLAIN_FILE:
1531    #endif
1532      {
1533      in = fopen(pathname, "r");
1534      handle = (void *)in;
1535      frtype = FR_PLAIN;
1536      }
1537    
1538    /* All the opening methods return errno when they fail. */
1539    
1540    if (handle == NULL)
1541      {
1542      if (!silent)
1543        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1544          strerror(errno));
1545      return 2;
1546      }
1547    
1548    /* Now grep the file */
1549    
1550    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1551      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1552    
1553    /* Close in an appropriate manner. */
1554    
1555    #ifdef SUPPORT_LIBZ
1556    if (frtype == FR_LIBZ)
1557      gzclose(ingz);
1558    else
1559    #endif
1560    
1561    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1562    read failed. If the error indicates that the file isn't in fact bzipped, try
1563    again as a normal file. */
1564    
1565    #ifdef SUPPORT_LIBBZ2
1566    if (frtype == FR_LIBBZ2)
1567      {
1568      if (rc == 2)
1569        {
1570        int errnum;
1571        const char *err = BZ2_bzerror(inbz2, &errnum);
1572        if (errnum == BZ_DATA_ERROR_MAGIC)
1573          {
1574          BZ2_bzclose(inbz2);
1575          goto PLAIN_FILE;
1576          }
1577        else if (!silent)
1578          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1579            pathname, err);
1580        }
1581      BZ2_bzclose(inbz2);
1582      }
1583    else
1584    #endif
1585    
1586    /* Normal file close */
1587    
1588    fclose(in);
1589    
1590    /* Pass back the yield from pcregrep(). */
1591    
1592    return rc;
1593    }
1594    
1595    
1596    
1597    
1598    /*************************************************
1599    *                Usage function                  *
1600    *************************************************/
1601    
1602    static int
1603    usage(int rc)
1604    {
1605    option_item *op;
1606    fprintf(stderr, "Usage: pcregrep [-");
1607    for (op = optionlist; op->one_char != 0; op++)
1608      {
1609      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1610      }
1611    fprintf(stderr, "] [long options] [pattern] [files]\n");
1612    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1613      "options.\n");
1614    return rc;
1615    }
1616    
1617    
1618    
1619    
1620    /*************************************************
1621    *                Help function                   *
1622    *************************************************/
1623    
1624    static void
1625    help(void)
1626    {
1627    option_item *op;
1628    
1629    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1630    printf("Search for PATTERN in each FILE or standard input.\n");
1631    printf("PATTERN must be present if neither -e nor -f is used.\n");
1632    printf("\"-\" can be used as a file name to mean STDIN.\n");
1633    
1634    #ifdef SUPPORT_LIBZ
1635    printf("Files whose names end in .gz are read using zlib.\n");
1636    #endif
1637    
1638    #ifdef SUPPORT_LIBBZ2
1639    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1640    #endif
1641    
1642    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1643    printf("Other files and the standard input are read as plain files.\n\n");
1644    #else
1645    printf("All files are read as plain files, without any interpretation.\n\n");
1646    #endif
1647    
1648    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1649    printf("Options:\n");
1650    
1651    for (op = optionlist; op->one_char != 0; op++)
1652      {
1653      int n;
1654      char s[4];
1655      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1656      n = 30 - printf("  %s --%s", s, op->long_name);
1657      if (n < 1) n = 1;
1658      printf("%.*s%s\n", n, "                    ", op->help_text);
1659      }
1660    
1661    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1662    printf("trailing white space is removed and blank lines are ignored.\n");
1663    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1664    
1665    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1666    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1667    }
1668    
1669    
1670    
1671    
1672    /*************************************************
1673    *    Handle a single-letter, no data option      *
1674    *************************************************/
1675    
1676    static int
1677    handle_option(int letter, int options)
1678    {
1679    switch(letter)
1680      {
1681      case N_FOFFSETS: file_offsets = TRUE; break;
1682      case N_HELP: help(); exit(0);
1683      case N_LOFFSETS: line_offsets = number = TRUE; break;
1684      case 'c': count_only = TRUE; break;
1685      case 'F': process_options |= PO_FIXED_STRINGS; break;
1686      case 'H': filenames = FN_FORCE; break;
1687      case 'h': filenames = FN_NONE; break;
1688      case 'i': options |= PCRE_CASELESS; break;
1689      case 'l': filenames = FN_ONLY; break;
1690      case 'L': filenames = FN_NOMATCH_ONLY; break;
1691      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1692      case 'n': number = TRUE; break;
1693      case 'o': only_matching = TRUE; break;
1694      case 'q': quiet = TRUE; break;
1695      case 'r': dee_action = dee_RECURSE; break;
1696      case 's': silent = TRUE; break;
1697      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1698      case 'v': invert = TRUE; break;
1699      case 'w': process_options |= PO_WORD_MATCH; break;
1700      case 'x': process_options |= PO_LINE_MATCH; break;
1701    
1702      case 'V':
1703      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1704      exit(0);
1705      break;
1706    
1707      default:
1708      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1709      exit(usage(2));
1710      }
1711    
1712    return options;
1713    }
1714    
1715    
1716    
1717    
1718    /*************************************************
1719    *          Construct printed ordinal             *
1720    *************************************************/
1721    
1722    /* This turns a number into "1st", "3rd", etc. */
1723    
1724    static char *
1725    ordin(int n)
1726    {
1727    static char buffer[8];
1728    char *p = buffer;
1729    sprintf(p, "%d", n);
1730    while (*p != 0) p++;
1731    switch (n%10)
1732      {
1733      case 1: strcpy(p, "st"); break;
1734      case 2: strcpy(p, "nd"); break;
1735      case 3: strcpy(p, "rd"); break;
1736      default: strcpy(p, "th"); break;
1737      }
1738    return buffer;
1739    }
1740    
1741    
1742    
1743    /*************************************************
1744    *          Compile a single pattern              *
1745    *************************************************/
1746    
1747    /* When the -F option has been used, this is called for each substring.
1748    Otherwise it's called for each supplied pattern.
1749    
1750    Arguments:
1751      pattern        the pattern string
1752      options        the PCRE options
1753      filename       the file name, or NULL for a command-line pattern
1754      count          0 if this is the only command line pattern, or
1755                     number of the command line pattern, or
1756                     linenumber for a pattern from a file
1757    
1758    Returns:         TRUE on success, FALSE after an error
1759    */
1760    
1761    static BOOL
1762    compile_single_pattern(char *pattern, int options, char *filename, int count)
1763    {
1764    char buffer[MBUFTHIRD + 16];
1765    const char *error;
1766    int errptr;
1767    
1768    if (pattern_count >= MAX_PATTERN_COUNT)
1769      {
1770      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1771        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1772      return FALSE;
1773      }
1774    
1775    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1776      suffix[process_options]);
1777    pattern_list[pattern_count] =
1778      pcre_compile(buffer, options, &error, &errptr, pcretables);
1779    if (pattern_list[pattern_count] != NULL)
1780      {
1781      pattern_count++;
1782      return TRUE;
1783      }
1784    
1785    /* Handle compile errors */
1786    
1787    errptr -= (int)strlen(prefix[process_options]);
1788    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1789    
1790    if (filename == NULL)
1791      {
1792      if (count == 0)
1793        fprintf(stderr, "pcregrep: Error in command-line regex "
1794          "at offset %d: %s\n", errptr, error);
1795      else
1796        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1797          "at offset %d: %s\n", ordin(count), errptr, error);
1798      }
1799    else
1800      {
1801      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1802        "at offset %d: %s\n", count, filename, errptr, error);
1803      }
1804    
1805    return FALSE;
1806    }
1807    
1808    
1809    
1810    /*************************************************
1811    *           Compile one supplied pattern         *
1812    *************************************************/
1813    
1814    /* When the -F option has been used, each string may be a list of strings,
1815    separated by line breaks. They will be matched literally.
1816    
1817    Arguments:
1818      pattern        the pattern string
1819      options        the PCRE options
1820      filename       the file name, or NULL for a command-line pattern
1821      count          0 if this is the only command line pattern, or
1822                     number of the command line pattern, or
1823                     linenumber for a pattern from a file
1824    
1825    Returns:         TRUE on success, FALSE after an error
1826    */
1827    
1828    static BOOL
1829    compile_pattern(char *pattern, int options, char *filename, int count)
1830    {
1831    if ((process_options & PO_FIXED_STRINGS) != 0)
1832      {
1833      char *eop = pattern + strlen(pattern);
1834      char buffer[MBUFTHIRD];
1835      for(;;)
1836        {
1837        int ellength;
1838        char *p = end_of_line(pattern, eop, &ellength);
1839        if (ellength == 0)
1840          return compile_single_pattern(pattern, options, filename, count);
1841        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1842        pattern = p;
1843        if (!compile_single_pattern(buffer, options, filename, count))
1844          return FALSE;
1845        }
1846      }
1847    else return compile_single_pattern(pattern, options, filename, count);
1848    }
1849    
1850    
1851    
1852    /*************************************************
1853    *                Main program                    *
1854    *************************************************/
1855    
1856    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1857    
1858    int
1859    main(int argc, char **argv)
1860    {
1861    int i, j;
1862    int rc = 1;
1863    int pcre_options = 0;
1864    int cmd_pattern_count = 0;
1865    int hint_count = 0;
1866    int errptr;
1867    BOOL only_one_at_top;
1868    char *patterns[MAX_PATTERN_COUNT];
1869    const char *locale_from = "--locale";
1870    const char *error;
1871    
1872    /* Set the default line ending value from the default in the PCRE library;
1873    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1874    Note that the return values from pcre_config(), though derived from the ASCII
1875    codes, are the same in EBCDIC environments, so we must use the actual values
1876    rather than escapes such as as '\r'. */
1877    
1878    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1879    switch(i)
1880      {
1881      default:               newline = (char *)"lf"; break;
1882      case 13:               newline = (char *)"cr"; break;
1883      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1884      case -1:               newline = (char *)"any"; break;
1885      case -2:               newline = (char *)"anycrlf"; break;
1886      }
1887    
1888    /* Process the options */
1889    
1890    for (i = 1; i < argc; i++)
1891      {
1892      option_item *op = NULL;
1893      char *option_data = (char *)"";    /* default to keep compiler happy */
1894      BOOL longop;
1895      BOOL longopwasequals = FALSE;
1896    
1897      if (argv[i][0] != '-') break;
1898    
1899      /* If we hit an argument that is just "-", it may be a reference to STDIN,
1900      but only if we have previously had -e or -f to define the patterns. */
1901    
1902      if (argv[i][1] == 0)
1903        {
1904        if (pattern_filename != NULL || pattern_count > 0) break;
1905          else exit(usage(2));
1906        }
1907    
1908      /* Handle a long name option, or -- to terminate the options */
1909    
1910      if (argv[i][1] == '-')
1911        {
1912        char *arg = argv[i] + 2;
1913        char *argequals = strchr(arg, '=');
1914    
1915        if (*arg == 0)    /* -- terminates options */
1916          {
1917          i++;
1918          break;                /* out of the options-handling loop */
1919          }
1920    
1921        longop = TRUE;
1922    
1923        /* Some long options have data that follows after =, for example file=name.
1924        Some options have variations in the long name spelling: specifically, we
1925        allow "regexp" because GNU grep allows it, though I personally go along
1926        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1927        These options are entered in the table as "regex(p)". No option is in both
1928        these categories, fortunately. */
1929    
1930      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1931        {        {
1932        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1933          char *equals = strchr(op->long_name, '=');
1934          if (opbra == NULL)     /* Not a (p) case */
1935          {          {
1936          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1937          break;            {
1938              if (strcmp(arg, op->long_name) == 0) break;
1939              }
1940            else                 /* Special case xxx=data */
1941              {
1942              int oplen = equals - op->long_name;
1943              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1944              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1945                {
1946                option_data = arg + arglen;
1947                if (*option_data == '=')
1948                  {
1949                  option_data++;
1950                  longopwasequals = TRUE;
1951                  }
1952                break;
1953                }
1954              }
1955            }
1956          else                   /* Special case xxxx(p) */
1957            {
1958            char buff1[24];
1959            char buff2[24];
1960            int baselen = opbra - op->long_name;
1961            sprintf(buff1, "%.*s", baselen, op->long_name);
1962            sprintf(buff2, "%s%.*s", buff1,
1963              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1964            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1965              break;
1966          }          }
1967        }        }
1968    
1969      if (op->one_char == 0)      if (op->one_char == 0)
1970        {        {
1971        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 519  for (i = 1; i < argc; i++) Line 1973  for (i = 1; i < argc; i++)
1973        }        }
1974      }      }
1975    
1976    /* One-char options */  
1977      /* Jeffrey Friedl's debugging harness uses these additional options which
1978      are not in the right form for putting in the option table because they use
1979      only one hyphen, yet are more than one character long. By putting them
1980      separately here, they will not get displayed as part of the help() output,
1981      but I don't think Jeffrey will care about that. */
1982    
1983    #ifdef JFRIEDL_DEBUG
1984      else if (strcmp(argv[i], "-pre") == 0) {
1985              jfriedl_prefix = argv[++i];
1986              continue;
1987      } else if (strcmp(argv[i], "-post") == 0) {
1988              jfriedl_postfix = argv[++i];
1989              continue;
1990      } else if (strcmp(argv[i], "-XT") == 0) {
1991              sscanf(argv[++i], "%d", &jfriedl_XT);
1992              continue;
1993      } else if (strcmp(argv[i], "-XR") == 0) {
1994              sscanf(argv[++i], "%d", &jfriedl_XR);
1995              continue;
1996      }
1997    #endif
1998    
1999    
2000      /* One-char options; many that have no data may be in a single argument; we
2001      continue till we hit the last one or one that needs data. */
2002    
2003    else    else
2004      {      {
2005      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2006        longop = FALSE;
2007      while (*s != 0)      while (*s != 0)
2008        {        {
2009        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2010            { if (*s == op->one_char) break; }
2011          if (op->one_char == 0)
2012          {          {
2013          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2014          if (pattern_filename[0] == 0)            *s, argv[i]);
2015            {          exit(usage(2));
2016            if (i >= argc - 1)          }
2017              {        if (op->type != OP_NODATA || s[1] == 0)
2018              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
2019              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
2020          break;          break;
2021          }          }
2022        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
2023        }        }
2024      }      }
   }  
2025    
2026  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
2027  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
2028      something in the PCRE options. */
2029    
2030  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
2031    {      {
2032    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
2033    return 2;      continue;
2034    }      }
2035    
2036  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2037      either has a value or defaults to something. It cannot have data in a
2038      separate item. At the moment, the only such options are "colo(u)r" and
2039      Jeffrey Friedl's special -S debugging option. */
2040    
2041  if (pattern_filename != NULL)    if (*option_data == 0 &&
2042    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
2043      {      {
2044      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
2045        strerror(errno));        {
2046      return 2;        case N_COLOUR:
2047          colour_option = (char *)"auto";
2048          break;
2049    #ifdef JFRIEDL_DEBUG
2050          case 'S':
2051          S_arg = 0;
2052          break;
2053    #endif
2054          }
2055        continue;
2056      }      }
2057    while (fgets(buffer, sizeof(buffer), f) != NULL)  
2058      /* Otherwise, find the data string for the option. */
2059    
2060      if (*option_data == 0)
2061      {      {
2062      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
     if (pattern_count >= MAX_PATTERN_COUNT)  
2063        {        {
2064        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2065          exit(usage(2));
2066          }
2067        option_data = argv[++i];
2068        }
2069    
2070      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2071      multiple times to create a list of patterns. */
2072    
2073      if (op->type == OP_PATLIST)
2074        {
2075        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2076          {
2077          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2078          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2079        return 2;        return 2;
2080        }        }
2081      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2082      if (s == buffer) continue;      }
2083      *s = 0;  
2084      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2085        &errptr, NULL);  
2086      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2087        {
2088        *((char **)op->dataptr) = option_data;
2089        }
2090      else
2091        {
2092        char *endptr;
2093        int n = strtoul(option_data, &endptr, 10);
2094        if (*endptr != 0)
2095        {        {
2096        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
2097          pattern_count, errptr, error);          {
2098        return 2;          char *equals = strchr(op->long_name, '=');
2099            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2100              equals - op->long_name;
2101            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2102              option_data, nlen, op->long_name);
2103            }
2104          else
2105            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2106              option_data, op->one_char);
2107          exit(usage(2));
2108        }        }
2109        *((int *)op->dataptr) = n;
2110        }
2111      }
2112    
2113    /* Options have been decoded. If -C was used, its value is used as a default
2114    for -A and -B. */
2115    
2116    if (both_context > 0)
2117      {
2118      if (after_context == 0) after_context = both_context;
2119      if (before_context == 0) before_context = both_context;
2120      }
2121    
2122    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2123    However, the latter two set the only_matching flag. */
2124    
2125    if ((only_matching && (file_offsets || line_offsets)) ||
2126        (file_offsets && line_offsets))
2127      {
2128      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2129        "and/or --line-offsets\n");
2130      exit(usage(2));
2131      }
2132    
2133    if (file_offsets || line_offsets) only_matching = TRUE;
2134    
2135    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2136    LC_ALL environment variable is set, and if so, use it. */
2137    
2138    if (locale == NULL)
2139      {
2140      locale = getenv("LC_ALL");
2141      locale_from = "LCC_ALL";
2142      }
2143    
2144    if (locale == NULL)
2145      {
2146      locale = getenv("LC_CTYPE");
2147      locale_from = "LC_CTYPE";
2148      }
2149    
2150    /* If a locale has been provided, set it, and generate the tables the PCRE
2151    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2152    
2153    if (locale != NULL)
2154      {
2155      if (setlocale(LC_CTYPE, locale) == NULL)
2156        {
2157        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2158          locale, locale_from);
2159        return 2;
2160        }
2161      pcretables = pcre_maketables();
2162      }
2163    
2164    /* Sort out colouring */
2165    
2166    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2167      {
2168      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2169      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2170      else
2171        {
2172        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2173          colour_option);
2174        return 2;
2175        }
2176      if (do_colour)
2177        {
2178        char *cs = getenv("PCREGREP_COLOUR");
2179        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2180        if (cs != NULL) colour_string = cs;
2181      }      }
   fclose(f);  
2182    }    }
2183    
2184  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2185    
2186    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2187      {
2188      pcre_options |= PCRE_NEWLINE_CR;
2189      endlinetype = EL_CR;
2190      }
2191    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2192      {
2193      pcre_options |= PCRE_NEWLINE_LF;
2194      endlinetype = EL_LF;
2195      }
2196    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2197      {
2198      pcre_options |= PCRE_NEWLINE_CRLF;
2199      endlinetype = EL_CRLF;
2200      }
2201    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2202      {
2203      pcre_options |= PCRE_NEWLINE_ANY;
2204      endlinetype = EL_ANY;
2205      }
2206    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2207      {
2208      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2209      endlinetype = EL_ANYCRLF;
2210      }
2211  else  else
2212    {    {
2213    if (i >= argc) return usage(2);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2214    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2215    if (pattern_list[0] == NULL)    }
2216    
2217    /* Interpret the text values for -d and -D */
2218    
2219    if (dee_option != NULL)
2220      {
2221      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2222      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2223      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2224      else
2225      {      {
2226      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
       error);  
2227      return 2;      return 2;
2228      }      }
   pattern_count++;  
2229    }    }
2230    
2231  /* Study the regular expressions, as we will be running them may times */  if (DEE_option != NULL)
2232      {
2233      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2234      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2235      else
2236        {
2237        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2238        return 2;
2239        }
2240      }
2241    
2242    /* Check the values for Jeffrey Friedl's debugging options. */
2243    
2244    #ifdef JFRIEDL_DEBUG
2245    if (S_arg > 9)
2246      {
2247      fprintf(stderr, "pcregrep: bad value for -S option\n");
2248      return 2;
2249      }
2250    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2251      {
2252      if (jfriedl_XT == 0) jfriedl_XT = 1;
2253      if (jfriedl_XR == 0) jfriedl_XR = 1;
2254      }
2255    #endif
2256    
2257    /* Get memory to store the pattern and hints lists. */
2258    
2259    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2260    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2261    
2262    if (pattern_list == NULL || hints_list == NULL)
2263      {
2264      fprintf(stderr, "pcregrep: malloc failed\n");
2265      goto EXIT2;
2266      }
2267    
2268    /* If no patterns were provided by -e, and there is no file provided by -f,
2269    the first argument is the one and only pattern, and it must exist. */
2270    
2271    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2272      {
2273      if (i >= argc) return usage(2);
2274      patterns[cmd_pattern_count++] = argv[i++];
2275      }
2276    
2277    /* Compile the patterns that were provided on the command line, either by
2278    multiple uses of -e or as a single unkeyed pattern. */
2279    
2280    for (j = 0; j < cmd_pattern_count; j++)
2281      {
2282      if (!compile_pattern(patterns[j], pcre_options, NULL,
2283           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2284        goto EXIT2;
2285      }
2286    
2287    /* Compile the regular expressions that are provided in a file. */
2288    
2289    if (pattern_filename != NULL)
2290      {
2291      int linenumber = 0;
2292      FILE *f;
2293      char *filename;
2294      char buffer[MBUFTHIRD];
2295    
2296      if (strcmp(pattern_filename, "-") == 0)
2297        {
2298        f = stdin;
2299        filename = stdin_name;
2300        }
2301      else
2302        {
2303        f = fopen(pattern_filename, "r");
2304        if (f == NULL)
2305          {
2306          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2307            strerror(errno));
2308          goto EXIT2;
2309          }
2310        filename = pattern_filename;
2311        }
2312    
2313      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2314        {
2315        char *s = buffer + (int)strlen(buffer);
2316        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2317        *s = 0;
2318        linenumber++;
2319        if (buffer[0] == 0) continue;   /* Skip blank lines */
2320        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2321          goto EXIT2;
2322        }
2323    
2324      if (f != stdin) fclose(f);
2325      }
2326    
2327    /* Study the regular expressions, as we will be running them many times */
2328    
2329  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2330    {    {
# Line 615  for (j = 0; j < pattern_count; j++) Line 2334  for (j = 0; j < pattern_count; j++)
2334      char s[16];      char s[16];
2335      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2336      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2337      return 2;      goto EXIT2;
2338        }
2339      hint_count++;
2340      }
2341    
2342    /* If there are include or exclude patterns, compile them. */
2343    
2344    if (exclude_pattern != NULL)
2345      {
2346      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2347        pcretables);
2348      if (exclude_compiled == NULL)
2349        {
2350        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2351          errptr, error);
2352        goto EXIT2;
2353        }
2354      }
2355    
2356    if (include_pattern != NULL)
2357      {
2358      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2359        pcretables);
2360      if (include_compiled == NULL)
2361        {
2362        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2363          errptr, error);
2364        goto EXIT2;
2365        }
2366      }
2367    
2368    if (exclude_dir_pattern != NULL)
2369      {
2370      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2371        pcretables);
2372      if (exclude_dir_compiled == NULL)
2373        {
2374        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2375          errptr, error);
2376        goto EXIT2;
2377        }
2378      }
2379    
2380    if (include_dir_pattern != NULL)
2381      {
2382      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2383        pcretables);
2384      if (include_dir_compiled == NULL)
2385        {
2386        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2387          errptr, error);
2388        goto EXIT2;
2389      }      }
2390    }    }
2391    
2392  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2393    
2394  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2395      {
2396      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2397      goto EXIT;
2398      }
2399    
2400  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2401  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2402  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2403    otherwise forced. */
2404    
2405  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2406    
2407  for (; i < argc; i++)  for (; i < argc; i++)
2408    {    {
2409    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2410    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2411      if (frc > 1) rc = frc;
2412        else if (frc == 0 && rc == 1) rc = 0;
2413    }    }
2414    
2415    EXIT:
2416    if (pattern_list != NULL)
2417      {
2418      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2419      free(pattern_list);
2420      }
2421    if (hints_list != NULL)
2422      {
2423      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2424      free(hints_list);
2425      }
2426  return rc;  return rc;
2427    
2428    EXIT2:
2429    rc = 2;
2430    goto EXIT;
2431  }  }
2432    
2433  /* End */  /* End of pcregrep */

Legend:
Removed from v.63  
changed lines
  Added in v.391

  ViewVC Help
Powered by ViewVC 1.1.5