/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC revision 378 by ph10, Sun Mar 1 14:13:34 2009 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2004 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 50  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "3.0 14-Jan-2003"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define MBUFTHIRD BUFSIZ
78    #else
79    #define MBUFTHIRD 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    
108    
109  /*************************************************  /*************************************************
110  *               Global variables                 *  *               Global variables                 *
111  *************************************************/  *************************************************/
112    
113    /* Jeffrey Friedl has some debugging requirements that are not part of the
114    regular code. */
115    
116    #ifdef JFRIEDL_DEBUG
117    static int S_arg = -1;
118    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120    static const char *jfriedl_prefix = "";
121    static const char *jfriedl_postfix = "";
122    #endif
123    
124    static int  endlinetype;
125    
126    static char *colour_string = (char *)"1;31";
127    static char *colour_option = NULL;
128    static char *dee_option = NULL;
129    static char *DEE_option = NULL;
130    static char *newline = NULL;
131  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
132    static char *stdin_name = (char *)"(standard input)";
133    static char *locale = NULL;
134    
135    static const unsigned char *pcretables = NULL;
136    
137  static int  pattern_count = 0;  static int  pattern_count = 0;
138  static pcre **pattern_list;  static pcre **pattern_list = NULL;
139  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
140    
141    static char *include_pattern = NULL;
142    static char *exclude_pattern = NULL;
143    static char *include_dir_pattern = NULL;
144    static char *exclude_dir_pattern = NULL;
145    
146    static pcre *include_compiled = NULL;
147    static pcre *exclude_compiled = NULL;
148    static pcre *include_dir_compiled = NULL;
149    static pcre *exclude_dir_compiled = NULL;
150    
151    static int after_context = 0;
152    static int before_context = 0;
153    static int both_context = 0;
154    static int dee_action = dee_READ;
155    static int DEE_action = DEE_READ;
156    static int error_count = 0;
157    static int filenames = FN_DEFAULT;
158    static int process_options = 0;
159    
160  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
161  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
162  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
163    static BOOL hyphenpending = FALSE;
164  static BOOL invert = FALSE;  static BOOL invert = FALSE;
165    static BOOL line_offsets = FALSE;
166    static BOOL multiline = FALSE;
167  static BOOL number = FALSE;  static BOOL number = FALSE;
168  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
169    static BOOL quiet = FALSE;
170  static BOOL silent = FALSE;  static BOOL silent = FALSE;
171  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
172    
173  /* Structure for options and list of them */  /* Structure for options and list of them */
174    
175    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
176           OP_PATLIST };
177    
178  typedef struct option_item {  typedef struct option_item {
179      int type;
180    int one_char;    int one_char;
181      void *dataptr;
182    const char *long_name;    const char *long_name;
183    const char *help_text;    const char *help_text;
184  } option_item;  } option_item;
185    
186    /* Options without a single-letter equivalent get a negative value. This can be
187    used to identify them. */
188    
189    #define N_COLOUR       (-1)
190    #define N_EXCLUDE      (-2)
191    #define N_EXCLUDE_DIR  (-3)
192    #define N_HELP         (-4)
193    #define N_INCLUDE      (-5)
194    #define N_INCLUDE_DIR  (-6)
195    #define N_LABEL        (-7)
196    #define N_LOCALE       (-8)
197    #define N_NULL         (-9)
198    #define N_LOFFSETS     (-10)
199    #define N_FOFFSETS     (-11)
200    
201  static option_item optionlist[] = {  static option_item optionlist[] = {
202    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
203    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
204    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
205    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
206    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
207    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
208    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
209    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
210    { 'u', "utf-8",        "use UTF-8 mode" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
211    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
212    { 'v', "invert-match", "select non-matching lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
213    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
214    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
215    { 0,    NULL,           NULL }    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
216      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
217      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
218      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
219      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
220      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
221      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
222      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
223      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
224      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
225      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
226      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
227      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
228      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
229      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
230      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
231      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
232      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
233      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
234    #ifdef JFRIEDL_DEBUG
235      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
236    #endif
237      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
238      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
239      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
240      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
241      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
242      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
243      { OP_NODATA,    0,        NULL,               NULL,            NULL }
244  };  };
245    
246    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
247    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
248    that the combination of -w and -x has the same effect as -x on its own, so we
249    can treat them as the same. */
250    
251    static const char *prefix[] = {
252      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
253    
254    static const char *suffix[] = {
255      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
256    
257    /* UTF-8 tables - used only when the newline setting is "any". */
258    
259    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
260    
261    const char utf8_table4[] = {
262      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
265      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
266    
267    
268    
269  /*************************************************  /*************************************************
270  *       Functions for directory scanning         *  *            OS-specific functions               *
271  *************************************************/  *************************************************/
272    
273  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
274  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
275    
276    
277  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
278    
279  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
280  #include <sys/types.h>  #include <sys/types.h>
281  #include <sys/stat.h>  #include <sys/stat.h>
282  #include <dirent.h>  #include <dirent.h>
# Line 141  for (;;) Line 308  for (;;)
308    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
309      return dent->d_name;      return dent->d_name;
310    }    }
311  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
312  }  }
313    
314  static void  static void
# Line 151  closedir(dir); Line 318  closedir(dir);
318  }  }
319    
320    
321    /************* Test for regular file in Unix **********/
322    
323    static int
324    isregfile(char *filename)
325    {
326    struct stat statbuf;
327    if (stat(filename, &statbuf) < 0)
328      return 1;        /* In the expectation that opening as a file will fail */
329    return (statbuf.st_mode & S_IFMT) == S_IFREG;
330    }
331    
332    
333    /************* Test stdout for being a terminal in Unix **********/
334    
335    static BOOL
336    is_stdout_tty(void)
337    {
338    return isatty(fileno(stdout));
339    }
340    
341    
342  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
343    
344  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
345  Lionel Fourquaux. */  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
346    when it did not exist. David Byron added a patch that moved the #include of
347    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
348    */
349    
350  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
351    
352  #ifndef STRICT  #ifndef STRICT
353  # define STRICT  # define STRICT
# Line 165  Lionel Fourquaux. */ Line 355  Lionel Fourquaux. */
355  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
356  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
357  #endif  #endif
358    
359  #include <windows.h>  #include <windows.h>
360    
361    #ifndef INVALID_FILE_ATTRIBUTES
362    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
363    #endif
364    
365  typedef struct directory_type  typedef struct directory_type
366  {  {
367  HANDLE handle;  HANDLE handle;
# Line 244  free(dir); Line 439  free(dir);
439  }  }
440    
441    
442    /************* Test for regular file in Win32 **********/
443    
444    /* I don't know how to do this, or if it can be done; assume all paths are
445    regular if they are not directories. */
446    
447    int isregfile(char *filename)
448    {
449    return !isdirectory(filename);
450    }
451    
452    
453    /************* Test stdout for being a terminal in Win32 **********/
454    
455    /* I don't know how to do this; assume never */
456    
457    static BOOL
458    is_stdout_tty(void)
459    {
460    return FALSE;
461    }
462    
463    
464  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
465    
466  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 252  free(dir); Line 469  free(dir);
469    
470  typedef void directory_type;  typedef void directory_type;
471    
472  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
473  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
474  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
475  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
476    
477    
478    /************* Test for regular when we can't do it **********/
479    
480    /* Assume all files are regular. */
481    
482    int isregfile(char *filename) { return 1; }
483    
484    
485    /************* Test stdout for being a terminal when we can't do it **********/
486    
487    static BOOL
488    is_stdout_tty(void)
489    {
490    return FALSE;
491    }
492    
493    
494  #endif  #endif
495    
496    
497    
498  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
499  /*************************************************  /*************************************************
500  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
501  *************************************************/  *************************************************/
# Line 284  return sys_errlist[n]; Line 518  return sys_errlist[n];
518    
519    
520  /*************************************************  /*************************************************
521  *              Grep an individual file           *  *             Find end of line                   *
522  *************************************************/  *************************************************/
523    
524  static int  /* The length of the endline sequence that is found is set via lenptr. This may
525  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
526  {  
527  int rc = 1;  Arguments:
528  int linenumber = 0;    p         current position in line
529  int count = 0;    endptr    end of available data
530  int offsets[99];    lenptr    where to put the length of the eol sequence
531  char buffer[BUFSIZ];  
532    Returns:    pointer to the last byte of the line
533    */
534    
535  while (fgets(buffer, sizeof(buffer), in) != NULL)  static char *
536    end_of_line(char *p, char *endptr, int *lenptr)
537    {
538    switch(endlinetype)
539    {    {
540    BOOL match = FALSE;    default:      /* Just in case */
541    int i;    case EL_LF:
542    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
543    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
544    linenumber++;      {
545        *lenptr = 1;
546        return p + 1;
547        }
548      *lenptr = 0;
549      return endptr;
550    
551    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
552      while (p < endptr && *p != '\r') p++;
553      if (p < endptr)
554      {      {
555      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
556        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
557      }      }
558      *lenptr = 0;
559      return endptr;
560    
561    if (match != invert)    case EL_CRLF:
562      for (;;)
563      {      {
564      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
565        if (++p >= endptr)
566          {
567          *lenptr = 0;
568          return endptr;
569          }
570        if (*p == '\n')
571          {
572          *lenptr = 2;
573          return p + 1;
574          }
575        }
576      break;
577    
578      case EL_ANYCRLF:
579      while (p < endptr)
580        {
581        int extra = 0;
582        register int c = *((unsigned char *)p);
583    
584      else if (filenames_only)      if (utf8 && c >= 0xc0)
585        {        {
586        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
587        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
588          gcss = 6*extra;
589          c = (c & utf8_table3[extra]) << gcss;
590          for (gcii = 1; gcii <= extra; gcii++)
591            {
592            gcss -= 6;
593            c |= (p[gcii] & 0x3f) << gcss;
594            }
595        }        }
596    
597      else if (silent) return 0;      p += 1 + extra;
598    
599      else      switch (c)
600        {        {
601        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
602        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
603        fprintf(stdout, "%s\n", buffer);        return p;
604    
605          case 0x0d:    /* CR */
606          if (p < endptr && *p == 0x0a)
607            {
608            *lenptr = 2;
609            p++;
610            }
611          else *lenptr = 1;
612          return p;
613    
614          default:
615          break;
616        }        }
617        }   /* End of loop for ANYCRLF case */
618    
619      rc = 0;    *lenptr = 0;  /* Must have hit the end */
620      }    return endptr;
   }  
621    
622  if (count_only)    case EL_ANY:
623    {    while (p < endptr)
624    if (name != NULL) fprintf(stdout, "%s:", name);      {
625    fprintf(stdout, "%d\n", count);      int extra = 0;
626    }      register int c = *((unsigned char *)p);
627    
628  return rc;      if (utf8 && c >= 0xc0)
629  }        {
630          int gcii, gcss;
631          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
632          gcss = 6*extra;
633          c = (c & utf8_table3[extra]) << gcss;
634          for (gcii = 1; gcii <= extra; gcii++)
635            {
636            gcss -= 6;
637            c |= (p[gcii] & 0x3f) << gcss;
638            }
639          }
640    
641        p += 1 + extra;
642    
643        switch (c)
644          {
645          case 0x0a:    /* LF */
646          case 0x0b:    /* VT */
647          case 0x0c:    /* FF */
648          *lenptr = 1;
649          return p;
650    
651          case 0x0d:    /* CR */
652          if (p < endptr && *p == 0x0a)
653            {
654            *lenptr = 2;
655            p++;
656            }
657          else *lenptr = 1;
658          return p;
659    
660          case 0x85:    /* NEL */
661          *lenptr = utf8? 2 : 1;
662          return p;
663    
664          case 0x2028:  /* LS */
665          case 0x2029:  /* PS */
666          *lenptr = 3;
667          return p;
668    
669          default:
670          break;
671          }
672        }   /* End of loop for ANY case */
673    
674      *lenptr = 0;  /* Must have hit the end */
675      return endptr;
676      }     /* End of overall switch */
677    }
678    
679    
680    
681  /*************************************************  /*************************************************
682  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
683  *************************************************/  *************************************************/
684    
685  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL dir_recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
686    
687  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
688  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
689      startptr  start of available data
690    
691  if ((sep = isdirectory(filename)) != 0 && dir_recurse)  Returns:    pointer to the start of the previous line
692    {  */
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
693    
694    if (dir == NULL)  static char *
695      {  previous_line(char *p, char *startptr)
696      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  {
697        strerror(errno));  switch(endlinetype)
698      return 2;    {
699      }    default:      /* Just in case */
700      case EL_LF:
701      p--;
702      while (p > startptr && p[-1] != '\n') p--;
703      return p;
704    
705      case EL_CR:
706      p--;
707      while (p > startptr && p[-1] != '\n') p--;
708      return p;
709    
710    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
711      for (;;)
712      {      {
713      int frc;      p -= 2;
714      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      while (p > startptr && p[-1] != '\n') p--;
715      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);      if (p <= startptr + 1 || p[-2] == '\r') return p;
     if (frc == 0 && rc == 1) rc = 0;  
716      }      }
717      return p;   /* But control should never get here */
718    
719    closedirectory(dir);    case EL_ANY:
720    return rc;    case EL_ANYCRLF:
721    }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
722      if (utf8) while ((*p & 0xc0) == 0x80) p--;
723    
724  /* If the file is not a directory, or we are not recursing, scan it. If this is    while (p > startptr)
725  the first and only argument at top level, we don't show the file name (unless      {
726  we are only showing the file name). Otherwise, control is via the      register int c;
727  show_filenames variable. */      char *pp = p - 1;
728    
729  in = fopen(filename, "r");      if (utf8)
730  if (in == NULL)        {
731    {        int extra = 0;
732    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));        while ((*pp & 0xc0) == 0x80) pp--;
733    return 2;        c = *((unsigned char *)pp);
734    }        if (c >= 0xc0)
735            {
736            int gcii, gcss;
737            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
738            gcss = 6*extra;
739            c = (c & utf8_table3[extra]) << gcss;
740            for (gcii = 1; gcii <= extra; gcii++)
741              {
742              gcss -= 6;
743              c |= (pp[gcii] & 0x3f) << gcss;
744              }
745            }
746          }
747        else c = *((unsigned char *)pp);
748    
749  rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?      if (endlinetype == EL_ANYCRLF) switch (c)
750    filename : NULL);        {
751  fclose(in);        case 0x0a:    /* LF */
752  return rc;        case 0x0d:    /* CR */
753  }        return p;
754    
755          default:
756          break;
757          }
758    
759        else switch (c)
760          {
761          case 0x0a:    /* LF */
762          case 0x0b:    /* VT */
763          case 0x0c:    /* FF */
764          case 0x0d:    /* CR */
765          case 0x85:    /* NEL */
766          case 0x2028:  /* LS */
767          case 0x2029:  /* PS */
768          return p;
769    
770          default:
771          break;
772          }
773    
774  /*************************************************      p = pp;  /* Back one character */
775  *                Usage function                  *      }        /* End of loop for ANY case */
 *************************************************/  
776    
777  static int    return startptr;  /* Hit start of data */
778  usage(int rc)    }     /* End of overall switch */
 {  
 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
779  }  }
780    
781    
782    
783    
784    
785  /*************************************************  /*************************************************
786  *                Help function                   *  *       Print the previous "after" lines         *
787  *************************************************/  *************************************************/
788    
789  static void  /* This is called if we are about to lose said lines because of buffer filling,
790  help(void)  and at the end of the file. The data in the line is written using fwrite() so
791  {  that a binary zero does not terminate it.
792  option_item *op;  
793    Arguments:
794  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");    lastmatchnumber   the number of the last matching line, plus one
795  printf("Search for PATTERN in each FILE or standard input.\n");    lastmatchrestart  where we restarted after the last match
796  printf("PATTERN must be present if -f is not used.\n");    endptr            end of available data
797  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");    printname         filename for printing
798    
799  printf("Options:\n");  Returns:            nothing
800    */
801    
802  for (op = optionlist; op->one_char != 0; op++)  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
803      char *endptr, char *printname)
804    {
805    if (after_context > 0 && lastmatchnumber > 0)
806    {    {
807    int n;    int count = 0;
808    char s[4];    while (lastmatchrestart < endptr && count++ < after_context)
809    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");      {
810    printf("  %s --%s%n", s, op->long_name, &n);      int ellength;
811    n = 30 - n;      char *pp = lastmatchrestart;
812    if (n < 1) n = 1;      if (printname != NULL) fprintf(stdout, "%s-", printname);
813    printf("%.*s%s\n", n, "                    ", op->help_text);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
814        pp = end_of_line(pp, endptr, &ellength);
815        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
816        lastmatchrestart = pp;
817        }
818      hyphenpending = TRUE;
819    }    }
   
 printf("\n  -f<filename>  or  --file=<filename>\n");  
 printf("    Read patterns from <filename> instead of using a command line option.\n");  
 printf("    Trailing white space is removed; blanks lines are ignored.\n");  
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
820  }  }
821    
822    
823    
   
824  /*************************************************  /*************************************************
825  *                Handle an option                *  *   Apply patterns to subject till one matches   *
826  *************************************************/  *************************************************/
827    
828  static int  /* This function is called to run through all patterns, looking for a match. It
829  handle_option(int letter, int options)  is used multiple times for the same subject when colouring is enabled, in order
830    to find all possible matches.
831    
832    Arguments:
833      matchptr    the start of the subject
834      length      the length of the subject to match
835      offsets     the offets vector to fill in
836      mrc         address of where to put the result of pcre_exec()
837    
838    Returns:      TRUE if there was a match
839                  FALSE if there was no match
840                  invert if there was a non-fatal error
841    */
842    
843    static BOOL
844    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
845  {  {
846  switch(letter)  int i;
847    for (i = 0; i < pattern_count; i++)
848    {    {
849    case -1:  help(); exit(0);    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
850    case 'c': count_only = TRUE; break;      offsets, OFFSET_SIZE);
851    case 'h': filenames = FALSE; break;    if (*mrc >= 0) return TRUE;
852    case 'i': options |= PCRE_CASELESS; break;    if (*mrc == PCRE_ERROR_NOMATCH) continue;
853    case 'l': filenames_only = TRUE;    fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
854    case 'n': number = TRUE; break;    if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
855    case 'r': recurse = TRUE; break;    fprintf(stderr, "this text:\n");
856    case 's': silent = TRUE; break;    fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */
857    case 'u': options |= PCRE_UTF8; break;    fprintf(stderr, "\n");
858    case 'v': invert = TRUE; break;    if (error_count == 0 &&
859    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;        (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
860        {
861    case 'V':      fprintf(stderr, "pcregrep: error %d means that a resource limit "
862    fprintf(stderr, "pcregrep version %s using ", VERSION);        "was exceeded\n", *mrc);
863    fprintf(stderr, "PCRE version %s\n", pcre_version());      fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
864    exit(0);      }
865    break;    if (error_count++ > 20)
866        {
867    default:      fprintf(stderr, "pcregrep: too many errors - abandoned\n");
868    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);      exit(2);
869    exit(usage(2));      }
870      return invert;    /* No more matching; don't show the line again */
871    }    }
872    
873  return options;  return FALSE;  /* No match, no errors */
874  }  }
875    
876    
877    
   
878  /*************************************************  /*************************************************
879  *                Main program                    *  *            Grep an individual file             *
880  *************************************************/  *************************************************/
881    
882  int  /* This is called from grep_or_recurse() below. It uses a buffer that is three
883  main(int argc, char **argv)  times the value of MBUFTHIRD. The matching point is never allowed to stray into
884    the top third of the buffer, thus keeping more of the file available for
885    context printing or for multiline scanning. For large files, the pointer will
886    be in the middle third most of the time, so the bottom third is available for
887    "before" context printing.
888    
889    Arguments:
890      handle       the fopened FILE stream for a normal file
891                   the gzFile pointer when reading is via libz
892                   the BZFILE pointer when reading is via libbz2
893      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
894      printname    the file name if it is to be printed for each match
895                   or NULL if the file name is not to be printed
896                   it cannot be NULL if filenames[_nomatch]_only is set
897    
898    Returns:       0 if there was at least one match
899                   1 otherwise (no matches)
900                   2 if there is a read error on a .bz2 file
901    */
902    
903    static int
904    pcregrep(void *handle, int frtype, char *printname)
905  {  {
 int i, j;  
906  int rc = 1;  int rc = 1;
907  int options = 0;  int linenumber = 1;
908  int errptr;  int lastmatchnumber = 0;
909  const char *error;  int count = 0;
910  BOOL only_one_at_top;  int filepos = 0;
911    int offsets[OFFSET_SIZE];
912    char *lastmatchrestart = NULL;
913    char buffer[3*MBUFTHIRD];
914    char *ptr = buffer;
915    char *endptr;
916    size_t bufflength;
917    BOOL endhyphenpending = FALSE;
918    FILE *in = NULL;                    /* Ensure initialized */
919    
920  /* Process the options */  #ifdef SUPPORT_LIBZ
921    gzFile ingz = NULL;
922    #endif
923    
924  for (i = 1; i < argc; i++)  #ifdef SUPPORT_LIBBZ2
925    BZFILE *inbz2 = NULL;
926    #endif
927    
928    
929    /* Do the first read into the start of the buffer and set up the pointer to end
930    of what we have. In the case of libz, a non-zipped .gz file will be read as a
931    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
932    fail. */
933    
934    #ifdef SUPPORT_LIBZ
935    if (frtype == FR_LIBZ)
936    {    {
937    if (argv[i][0] != '-') break;    ingz = (gzFile)handle;
938      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
939      }
940    else
941    #endif
942    
943    /* Missing options */  #ifdef SUPPORT_LIBBZ2
944    if (frtype == FR_LIBBZ2)
945      {
946      inbz2 = (BZFILE *)handle;
947      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
948      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
949      }                                    /* without the cast it is unsigned. */
950    else
951    #endif
952    
953    if (argv[i][1] == 0) exit(usage(2));    {
954      in = (FILE *)handle;
955      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
956      }
957    
958    /* Long name options */  endptr = buffer + bufflength;
959    
960    if (argv[i][1] == '-')  /* Loop while the current pointer is not at the end of the file. For large
961      {  files, endptr will be at the end of the buffer when we are in the middle of the
962      option_item *op;  file, but ptr will never get there, because as soon as it gets over 2/3 of the
963    way, the buffer is shifted left and re-filled. */
964    
965      if (strncmp(argv[i]+2, "file=", 5) == 0)  while (ptr < endptr)
966        {    {
967        pattern_filename = argv[i] + 7;    int endlinelength;
968        continue;    int mrc = 0;
969        }    BOOL match;
970      char *matchptr = ptr;
971      char *t = ptr;
972      size_t length, linelength;
973    
974      /* At this point, ptr is at the start of a line. We need to find the length
975      of the subject string to pass to pcre_exec(). In multiline mode, it is the
976      length remainder of the data in the buffer. Otherwise, it is the length of
977      the next line, excluding the terminating newline. After matching, we always
978      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
979      option is used for compiling, so that any match is constrained to be in the
980      first line. */
981    
982      t = end_of_line(t, endptr, &endlinelength);
983      linelength = t - ptr - endlinelength;
984      length = multiline? (size_t)(endptr - ptr) : linelength;
985    
986      for (op = optionlist; op->one_char != 0; op++)    /* Extra processing for Jeffrey Friedl's debugging. */
987    
988    #ifdef JFRIEDL_DEBUG
989      if (jfriedl_XT || jfriedl_XR)
990      {
991          #include <sys/time.h>
992          #include <time.h>
993          struct timeval start_time, end_time;
994          struct timezone dummy;
995          int i;
996    
997          if (jfriedl_XT)
998          {
999              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1000              const char *orig = ptr;
1001              ptr = malloc(newlen + 1);
1002              if (!ptr) {
1003                      printf("out of memory");
1004                      exit(2);
1005              }
1006              endptr = ptr;
1007              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1008              for (i = 0; i < jfriedl_XT; i++) {
1009                      strncpy(endptr, orig,  length);
1010                      endptr += length;
1011              }
1012              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1013              length = newlen;
1014          }
1015    
1016          if (gettimeofday(&start_time, &dummy) != 0)
1017                  perror("bad gettimeofday");
1018    
1019    
1020          for (i = 0; i < jfriedl_XR; i++)
1021              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, OFFSET_SIZE) >= 0);
1022    
1023          if (gettimeofday(&end_time, &dummy) != 0)
1024                  perror("bad gettimeofday");
1025    
1026          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1027                          -
1028                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1029    
1030          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1031          return 0;
1032      }
1033    #endif
1034    
1035      /* We come back here after a match when the -o option (only_matching) is set,
1036      in order to find any further matches in the same line. */
1037    
1038      ONLY_MATCHING_RESTART:
1039    
1040      /* Run through all the patterns until one matches or there is an error other
1041      than NOMATCH. This code is in a subroutine so that it can be re-used for
1042      finding subsequent matches when colouring matched lines. */
1043    
1044      match = match_patterns(matchptr, length, offsets, &mrc);
1045    
1046      /* If it's a match or a not-match (as required), do what's wanted. */
1047    
1048      if (match != invert)
1049        {
1050        BOOL hyphenprinted = FALSE;
1051    
1052        /* We've failed if we want a file that doesn't have any matches. */
1053    
1054        if (filenames == FN_NOMATCH_ONLY) return 1;
1055    
1056        /* Just count if just counting is wanted. */
1057    
1058        if (count_only) count++;
1059    
1060        /* If all we want is a file name, there is no need to scan any more lines
1061        in the file. */
1062    
1063        else if (filenames == FN_ONLY)
1064          {
1065          fprintf(stdout, "%s\n", printname);
1066          return 0;
1067          }
1068    
1069        /* Likewise, if all we want is a yes/no answer. */
1070    
1071        else if (quiet) return 0;
1072    
1073        /* The --only-matching option prints just the substring that matched, and
1074        the --file-offsets and --line-offsets options output offsets for the
1075        matching substring (they both force --only-matching). None of these options
1076        prints any context. Afterwards, adjust the start and length, and then jump
1077        back to look for further matches in the same line. If we are in invert
1078        mode, however, nothing is printed - this could be still useful because the
1079        return code is set. */
1080    
1081        else if (only_matching)
1082          {
1083          if (!invert)
1084            {
1085            if (printname != NULL) fprintf(stdout, "%s:", printname);
1086            if (number) fprintf(stdout, "%d:", linenumber);
1087            if (line_offsets)
1088              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1089                offsets[1] - offsets[0]);
1090            else if (file_offsets)
1091              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1092                offsets[1] - offsets[0]);
1093            else
1094              {
1095              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1096              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1097              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1098              }
1099            fprintf(stdout, "\n");
1100            matchptr += offsets[1];
1101            length -= offsets[1];
1102            match = FALSE;
1103            goto ONLY_MATCHING_RESTART;
1104            }
1105          }
1106    
1107        /* This is the default case when none of the above options is set. We print
1108        the matching lines(s), possibly preceded and/or followed by other lines of
1109        context. */
1110    
1111        else
1112          {
1113          /* See if there is a requirement to print some "after" lines from a
1114          previous match. We never print any overlaps. */
1115    
1116          if (after_context > 0 && lastmatchnumber > 0)
1117            {
1118            int ellength;
1119            int linecount = 0;
1120            char *p = lastmatchrestart;
1121    
1122            while (p < ptr && linecount < after_context)
1123              {
1124              p = end_of_line(p, ptr, &ellength);
1125              linecount++;
1126              }
1127    
1128            /* It is important to advance lastmatchrestart during this printing so
1129            that it interacts correctly with any "before" printing below. Print
1130            each line's data using fwrite() in case there are binary zeroes. */
1131    
1132            while (lastmatchrestart < p)
1133              {
1134              char *pp = lastmatchrestart;
1135              if (printname != NULL) fprintf(stdout, "%s-", printname);
1136              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1137              pp = end_of_line(pp, endptr, &ellength);
1138              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1139              lastmatchrestart = pp;
1140              }
1141            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1142            }
1143    
1144          /* If there were non-contiguous lines printed above, insert hyphens. */
1145    
1146          if (hyphenpending)
1147            {
1148            fprintf(stdout, "--\n");
1149            hyphenpending = FALSE;
1150            hyphenprinted = TRUE;
1151            }
1152    
1153          /* See if there is a requirement to print some "before" lines for this
1154          match. Again, don't print overlaps. */
1155    
1156          if (before_context > 0)
1157            {
1158            int linecount = 0;
1159            char *p = ptr;
1160    
1161            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1162                   linecount < before_context)
1163              {
1164              linecount++;
1165              p = previous_line(p, buffer);
1166              }
1167    
1168            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1169              fprintf(stdout, "--\n");
1170    
1171            while (p < ptr)
1172              {
1173              int ellength;
1174              char *pp = p;
1175              if (printname != NULL) fprintf(stdout, "%s-", printname);
1176              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1177              pp = end_of_line(pp, endptr, &ellength);
1178              fwrite(p, 1, pp - p, stdout);
1179              p = pp;
1180              }
1181            }
1182    
1183          /* Now print the matching line(s); ensure we set hyphenpending at the end
1184          of the file if any context lines are being output. */
1185    
1186          if (after_context > 0 || before_context > 0)
1187            endhyphenpending = TRUE;
1188    
1189          if (printname != NULL) fprintf(stdout, "%s:", printname);
1190          if (number) fprintf(stdout, "%d:", linenumber);
1191    
1192          /* In multiline mode, we want to print to the end of the line in which
1193          the end of the matched string is found, so we adjust linelength and the
1194          line number appropriately, but only when there actually was a match
1195          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1196          the match will always be before the first newline sequence. */
1197    
1198          if (multiline)
1199            {
1200            int ellength;
1201            char *endmatch = ptr;
1202            if (!invert)
1203              {
1204              endmatch += offsets[1];
1205              t = ptr;
1206              while (t < endmatch)
1207                {
1208                t = end_of_line(t, endptr, &ellength);
1209                if (t <= endmatch) linenumber++; else break;
1210                }
1211              }
1212            endmatch = end_of_line(endmatch, endptr, &ellength);
1213            linelength = endmatch - ptr - ellength;
1214            }
1215    
1216          /*** NOTE: Use only fwrite() to output the data line, so that binary
1217          zeroes are treated as just another data character. */
1218    
1219          /* This extra option, for Jeffrey Friedl's debugging requirements,
1220          replaces the matched string, or a specific captured string if it exists,
1221          with X. When this happens, colouring is ignored. */
1222    
1223    #ifdef JFRIEDL_DEBUG
1224          if (S_arg >= 0 && S_arg < mrc)
1225            {
1226            int first = S_arg * 2;
1227            int last  = first + 1;
1228            fwrite(ptr, 1, offsets[first], stdout);
1229            fprintf(stdout, "X");
1230            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1231            }
1232          else
1233    #endif
1234    
1235          /* We have to split the line(s) up if colouring, and search for further
1236          matches. */
1237    
1238          if (do_colour)
1239            {
1240            int last_offset = 0;
1241            fwrite(ptr, 1, offsets[0], stdout);
1242            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1243            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1244            fprintf(stdout, "%c[00m", 0x1b);
1245            for (;;)
1246              {
1247              last_offset += offsets[1];
1248              matchptr += offsets[1];
1249              length -= offsets[1];
1250              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1251              fwrite(matchptr, 1, offsets[0], stdout);
1252              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1253              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1254              fprintf(stdout, "%c[00m", 0x1b);
1255              }
1256            fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1257              stdout);
1258            }
1259    
1260          /* Not colouring; no need to search for further matches */
1261    
1262          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1263          }
1264    
1265        /* End of doing what has to be done for a match */
1266    
1267        rc = 0;    /* Had some success */
1268    
1269        /* Remember where the last match happened for after_context. We remember
1270        where we are about to restart, and that line's number. */
1271    
1272        lastmatchrestart = ptr + linelength + endlinelength;
1273        lastmatchnumber = linenumber + 1;
1274        }
1275    
1276      /* For a match in multiline inverted mode (which of course did not cause
1277      anything to be printed), we have to move on to the end of the match before
1278      proceeding. */
1279    
1280      if (multiline && invert && match)
1281        {
1282        int ellength;
1283        char *endmatch = ptr + offsets[1];
1284        t = ptr;
1285        while (t < endmatch)
1286          {
1287          t = end_of_line(t, endptr, &ellength);
1288          if (t <= endmatch) linenumber++; else break;
1289          }
1290        endmatch = end_of_line(endmatch, endptr, &ellength);
1291        linelength = endmatch - ptr - ellength;
1292        }
1293    
1294      /* Advance to after the newline and increment the line number. The file
1295      offset to the current line is maintained in filepos. */
1296    
1297      ptr += linelength + endlinelength;
1298      filepos += linelength + endlinelength;
1299      linenumber++;
1300    
1301      /* If we haven't yet reached the end of the file (the buffer is full), and
1302      the current point is in the top 1/3 of the buffer, slide the buffer down by
1303      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1304      about to be lost, print them. */
1305    
1306      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1307        {
1308        if (after_context > 0 &&
1309            lastmatchnumber > 0 &&
1310            lastmatchrestart < buffer + MBUFTHIRD)
1311          {
1312          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1313          lastmatchnumber = 0;
1314          }
1315    
1316        /* Now do the shuffle */
1317    
1318        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1319        ptr -= MBUFTHIRD;
1320    
1321    #ifdef SUPPORT_LIBZ
1322        if (frtype == FR_LIBZ)
1323          bufflength = 2*MBUFTHIRD +
1324            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1325        else
1326    #endif
1327    
1328    #ifdef SUPPORT_LIBBZ2
1329        if (frtype == FR_LIBBZ2)
1330          bufflength = 2*MBUFTHIRD +
1331            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1332        else
1333    #endif
1334    
1335        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1336    
1337        endptr = buffer + bufflength;
1338    
1339        /* Adjust any last match point */
1340    
1341        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1342        }
1343      }     /* Loop through the whole file */
1344    
1345    /* End of file; print final "after" lines if wanted; do_after_lines sets
1346    hyphenpending if it prints something. */
1347    
1348    if (!only_matching && !count_only)
1349      {
1350      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1351      hyphenpending |= endhyphenpending;
1352      }
1353    
1354    /* Print the file name if we are looking for those without matches and there
1355    were none. If we found a match, we won't have got this far. */
1356    
1357    if (filenames == FN_NOMATCH_ONLY)
1358      {
1359      fprintf(stdout, "%s\n", printname);
1360      return 0;
1361      }
1362    
1363    /* Print the match count if wanted */
1364    
1365    if (count_only)
1366      {
1367      if (printname != NULL) fprintf(stdout, "%s:", printname);
1368      fprintf(stdout, "%d\n", count);
1369      }
1370    
1371    return rc;
1372    }
1373    
1374    
1375    
1376    /*************************************************
1377    *     Grep a file or recurse into a directory    *
1378    *************************************************/
1379    
1380    /* Given a path name, if it's a directory, scan all the files if we are
1381    recursing; if it's a file, grep it.
1382    
1383    Arguments:
1384      pathname          the path to investigate
1385      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1386      only_one_at_top   TRUE if the path is the only one at toplevel
1387    
1388    Returns:   0 if there was at least one match
1389               1 if there were no matches
1390               2 there was some kind of error
1391    
1392    However, file opening failures are suppressed if "silent" is set.
1393    */
1394    
1395    static int
1396    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1397    {
1398    int rc = 1;
1399    int sep;
1400    int frtype;
1401    int pathlen;
1402    void *handle;
1403    FILE *in = NULL;           /* Ensure initialized */
1404    
1405    #ifdef SUPPORT_LIBZ
1406    gzFile ingz = NULL;
1407    #endif
1408    
1409    #ifdef SUPPORT_LIBBZ2
1410    BZFILE *inbz2 = NULL;
1411    #endif
1412    
1413    /* If the file name is "-" we scan stdin */
1414    
1415    if (strcmp(pathname, "-") == 0)
1416      {
1417      return pcregrep(stdin, FR_PLAIN,
1418        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1419          stdin_name : NULL);
1420      }
1421    
1422    /* If the file is a directory, skip if skipping or if we are recursing, scan
1423    each file and directory within it, subject to any include or exclude patterns
1424    that were set. The scanning code is localized so it can be made
1425    system-specific. */
1426    
1427    if ((sep = isdirectory(pathname)) != 0)
1428      {
1429      if (dee_action == dee_SKIP) return 1;
1430      if (dee_action == dee_RECURSE)
1431        {
1432        char buffer[1024];
1433        char *nextfile;
1434        directory_type *dir = opendirectory(pathname);
1435    
1436        if (dir == NULL)
1437          {
1438          if (!silent)
1439            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1440              strerror(errno));
1441          return 2;
1442          }
1443    
1444        while ((nextfile = readdirectory(dir)) != NULL)
1445          {
1446          int frc, nflen;
1447          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1448          nflen = strlen(nextfile);
1449    
1450          if (isdirectory(buffer))
1451            {
1452            if (exclude_dir_compiled != NULL &&
1453                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1454              continue;
1455    
1456            if (include_dir_compiled != NULL &&
1457                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1458              continue;
1459            }
1460          else
1461            {
1462            if (exclude_compiled != NULL &&
1463                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1464              continue;
1465    
1466            if (include_compiled != NULL &&
1467                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1468              continue;
1469            }
1470    
1471          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1472          if (frc > 1) rc = frc;
1473           else if (frc == 0 && rc == 1) rc = 0;
1474          }
1475    
1476        closedirectory(dir);
1477        return rc;
1478        }
1479      }
1480    
1481    /* If the file is not a directory and not a regular file, skip it if that's
1482    been requested. */
1483    
1484    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1485    
1486    /* Control reaches here if we have a regular file, or if we have a directory
1487    and recursion or skipping was not requested, or if we have anything else and
1488    skipping was not requested. The scan proceeds. If this is the first and only
1489    argument at top level, we don't show the file name, unless we are only showing
1490    the file name, or the filename was forced (-H). */
1491    
1492    pathlen = strlen(pathname);
1493    
1494    /* Open using zlib if it is supported and the file name ends with .gz. */
1495    
1496    #ifdef SUPPORT_LIBZ
1497    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1498      {
1499      ingz = gzopen(pathname, "rb");
1500      if (ingz == NULL)
1501        {
1502        if (!silent)
1503          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1504            strerror(errno));
1505        return 2;
1506        }
1507      handle = (void *)ingz;
1508      frtype = FR_LIBZ;
1509      }
1510    else
1511    #endif
1512    
1513    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1514    
1515    #ifdef SUPPORT_LIBBZ2
1516    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1517      {
1518      inbz2 = BZ2_bzopen(pathname, "rb");
1519      handle = (void *)inbz2;
1520      frtype = FR_LIBBZ2;
1521      }
1522    else
1523    #endif
1524    
1525    /* Otherwise use plain fopen(). The label is so that we can come back here if
1526    an attempt to read a .bz2 file indicates that it really is a plain file. */
1527    
1528    #ifdef SUPPORT_LIBBZ2
1529    PLAIN_FILE:
1530    #endif
1531      {
1532      in = fopen(pathname, "r");
1533      handle = (void *)in;
1534      frtype = FR_PLAIN;
1535      }
1536    
1537    /* All the opening methods return errno when they fail. */
1538    
1539    if (handle == NULL)
1540      {
1541      if (!silent)
1542        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1543          strerror(errno));
1544      return 2;
1545      }
1546    
1547    /* Now grep the file */
1548    
1549    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1550      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1551    
1552    /* Close in an appropriate manner. */
1553    
1554    #ifdef SUPPORT_LIBZ
1555    if (frtype == FR_LIBZ)
1556      gzclose(ingz);
1557    else
1558    #endif
1559    
1560    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1561    read failed. If the error indicates that the file isn't in fact bzipped, try
1562    again as a normal file. */
1563    
1564    #ifdef SUPPORT_LIBBZ2
1565    if (frtype == FR_LIBBZ2)
1566      {
1567      if (rc == 2)
1568        {
1569        int errnum;
1570        const char *err = BZ2_bzerror(inbz2, &errnum);
1571        if (errnum == BZ_DATA_ERROR_MAGIC)
1572          {
1573          BZ2_bzclose(inbz2);
1574          goto PLAIN_FILE;
1575          }
1576        else if (!silent)
1577          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1578            pathname, err);
1579        }
1580      BZ2_bzclose(inbz2);
1581      }
1582    else
1583    #endif
1584    
1585    /* Normal file close */
1586    
1587    fclose(in);
1588    
1589    /* Pass back the yield from pcregrep(). */
1590    
1591    return rc;
1592    }
1593    
1594    
1595    
1596    
1597    /*************************************************
1598    *                Usage function                  *
1599    *************************************************/
1600    
1601    static int
1602    usage(int rc)
1603    {
1604    option_item *op;
1605    fprintf(stderr, "Usage: pcregrep [-");
1606    for (op = optionlist; op->one_char != 0; op++)
1607      {
1608      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1609      }
1610    fprintf(stderr, "] [long options] [pattern] [files]\n");
1611    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1612      "options.\n");
1613    return rc;
1614    }
1615    
1616    
1617    
1618    
1619    /*************************************************
1620    *                Help function                   *
1621    *************************************************/
1622    
1623    static void
1624    help(void)
1625    {
1626    option_item *op;
1627    
1628    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1629    printf("Search for PATTERN in each FILE or standard input.\n");
1630    printf("PATTERN must be present if neither -e nor -f is used.\n");
1631    printf("\"-\" can be used as a file name to mean STDIN.\n");
1632    
1633    #ifdef SUPPORT_LIBZ
1634    printf("Files whose names end in .gz are read using zlib.\n");
1635    #endif
1636    
1637    #ifdef SUPPORT_LIBBZ2
1638    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1639    #endif
1640    
1641    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1642    printf("Other files and the standard input are read as plain files.\n\n");
1643    #else
1644    printf("All files are read as plain files, without any interpretation.\n\n");
1645    #endif
1646    
1647    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1648    printf("Options:\n");
1649    
1650    for (op = optionlist; op->one_char != 0; op++)
1651      {
1652      int n;
1653      char s[4];
1654      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1655      n = 30 - printf("  %s --%s", s, op->long_name);
1656      if (n < 1) n = 1;
1657      printf("%.*s%s\n", n, "                    ", op->help_text);
1658      }
1659    
1660    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1661    printf("trailing white space is removed and blank lines are ignored.\n");
1662    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1663    
1664    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1665    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1666    }
1667    
1668    
1669    
1670    
1671    /*************************************************
1672    *    Handle a single-letter, no data option      *
1673    *************************************************/
1674    
1675    static int
1676    handle_option(int letter, int options)
1677    {
1678    switch(letter)
1679      {
1680      case N_FOFFSETS: file_offsets = TRUE; break;
1681      case N_HELP: help(); exit(0);
1682      case N_LOFFSETS: line_offsets = number = TRUE; break;
1683      case 'c': count_only = TRUE; break;
1684      case 'F': process_options |= PO_FIXED_STRINGS; break;
1685      case 'H': filenames = FN_FORCE; break;
1686      case 'h': filenames = FN_NONE; break;
1687      case 'i': options |= PCRE_CASELESS; break;
1688      case 'l': filenames = FN_ONLY; break;
1689      case 'L': filenames = FN_NOMATCH_ONLY; break;
1690      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1691      case 'n': number = TRUE; break;
1692      case 'o': only_matching = TRUE; break;
1693      case 'q': quiet = TRUE; break;
1694      case 'r': dee_action = dee_RECURSE; break;
1695      case 's': silent = TRUE; break;
1696      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1697      case 'v': invert = TRUE; break;
1698      case 'w': process_options |= PO_WORD_MATCH; break;
1699      case 'x': process_options |= PO_LINE_MATCH; break;
1700    
1701      case 'V':
1702      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1703      exit(0);
1704      break;
1705    
1706      default:
1707      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1708      exit(usage(2));
1709      }
1710    
1711    return options;
1712    }
1713    
1714    
1715    
1716    
1717    /*************************************************
1718    *          Construct printed ordinal             *
1719    *************************************************/
1720    
1721    /* This turns a number into "1st", "3rd", etc. */
1722    
1723    static char *
1724    ordin(int n)
1725    {
1726    static char buffer[8];
1727    char *p = buffer;
1728    sprintf(p, "%d", n);
1729    while (*p != 0) p++;
1730    switch (n%10)
1731      {
1732      case 1: strcpy(p, "st"); break;
1733      case 2: strcpy(p, "nd"); break;
1734      case 3: strcpy(p, "rd"); break;
1735      default: strcpy(p, "th"); break;
1736      }
1737    return buffer;
1738    }
1739    
1740    
1741    
1742    /*************************************************
1743    *          Compile a single pattern              *
1744    *************************************************/
1745    
1746    /* When the -F option has been used, this is called for each substring.
1747    Otherwise it's called for each supplied pattern.
1748    
1749    Arguments:
1750      pattern        the pattern string
1751      options        the PCRE options
1752      filename       the file name, or NULL for a command-line pattern
1753      count          0 if this is the only command line pattern, or
1754                     number of the command line pattern, or
1755                     linenumber for a pattern from a file
1756    
1757    Returns:         TRUE on success, FALSE after an error
1758    */
1759    
1760    static BOOL
1761    compile_single_pattern(char *pattern, int options, char *filename, int count)
1762    {
1763    char buffer[MBUFTHIRD + 16];
1764    const char *error;
1765    int errptr;
1766    
1767    if (pattern_count >= MAX_PATTERN_COUNT)
1768      {
1769      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1770        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1771      return FALSE;
1772      }
1773    
1774    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1775      suffix[process_options]);
1776    pattern_list[pattern_count] =
1777      pcre_compile(buffer, options, &error, &errptr, pcretables);
1778    if (pattern_list[pattern_count] != NULL)
1779      {
1780      pattern_count++;
1781      return TRUE;
1782      }
1783    
1784    /* Handle compile errors */
1785    
1786    errptr -= (int)strlen(prefix[process_options]);
1787    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1788    
1789    if (filename == NULL)
1790      {
1791      if (count == 0)
1792        fprintf(stderr, "pcregrep: Error in command-line regex "
1793          "at offset %d: %s\n", errptr, error);
1794      else
1795        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1796          "at offset %d: %s\n", ordin(count), errptr, error);
1797      }
1798    else
1799      {
1800      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1801        "at offset %d: %s\n", count, filename, errptr, error);
1802      }
1803    
1804    return FALSE;
1805    }
1806    
1807    
1808    
1809    /*************************************************
1810    *           Compile one supplied pattern         *
1811    *************************************************/
1812    
1813    /* When the -F option has been used, each string may be a list of strings,
1814    separated by line breaks. They will be matched literally.
1815    
1816    Arguments:
1817      pattern        the pattern string
1818      options        the PCRE options
1819      filename       the file name, or NULL for a command-line pattern
1820      count          0 if this is the only command line pattern, or
1821                     number of the command line pattern, or
1822                     linenumber for a pattern from a file
1823    
1824    Returns:         TRUE on success, FALSE after an error
1825    */
1826    
1827    static BOOL
1828    compile_pattern(char *pattern, int options, char *filename, int count)
1829    {
1830    if ((process_options & PO_FIXED_STRINGS) != 0)
1831      {
1832      char *eop = pattern + strlen(pattern);
1833      char buffer[MBUFTHIRD];
1834      for(;;)
1835        {
1836        int ellength;
1837        char *p = end_of_line(pattern, eop, &ellength);
1838        if (ellength == 0)
1839          return compile_single_pattern(pattern, options, filename, count);
1840        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1841        pattern = p;
1842        if (!compile_single_pattern(buffer, options, filename, count))
1843          return FALSE;
1844        }
1845      }
1846    else return compile_single_pattern(pattern, options, filename, count);
1847    }
1848    
1849    
1850    
1851    /*************************************************
1852    *                Main program                    *
1853    *************************************************/
1854    
1855    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1856    
1857    int
1858    main(int argc, char **argv)
1859    {
1860    int i, j;
1861    int rc = 1;
1862    int pcre_options = 0;
1863    int cmd_pattern_count = 0;
1864    int hint_count = 0;
1865    int errptr;
1866    BOOL only_one_at_top;
1867    char *patterns[MAX_PATTERN_COUNT];
1868    const char *locale_from = "--locale";
1869    const char *error;
1870    
1871    /* Set the default line ending value from the default in the PCRE library;
1872    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1873    */
1874    
1875    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1876    switch(i)
1877      {
1878      default:                 newline = (char *)"lf"; break;
1879      case '\r':               newline = (char *)"cr"; break;
1880      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1881      case -1:                 newline = (char *)"any"; break;
1882      case -2:                 newline = (char *)"anycrlf"; break;
1883      }
1884    
1885    /* Process the options */
1886    
1887    for (i = 1; i < argc; i++)
1888      {
1889      option_item *op = NULL;
1890      char *option_data = (char *)"";    /* default to keep compiler happy */
1891      BOOL longop;
1892      BOOL longopwasequals = FALSE;
1893    
1894      if (argv[i][0] != '-') break;
1895    
1896      /* If we hit an argument that is just "-", it may be a reference to STDIN,
1897      but only if we have previously had -e or -f to define the patterns. */
1898    
1899      if (argv[i][1] == 0)
1900        {
1901        if (pattern_filename != NULL || pattern_count > 0) break;
1902          else exit(usage(2));
1903        }
1904    
1905      /* Handle a long name option, or -- to terminate the options */
1906    
1907      if (argv[i][1] == '-')
1908        {
1909        char *arg = argv[i] + 2;
1910        char *argequals = strchr(arg, '=');
1911    
1912        if (*arg == 0)    /* -- terminates options */
1913          {
1914          i++;
1915          break;                /* out of the options-handling loop */
1916          }
1917    
1918        longop = TRUE;
1919    
1920        /* Some long options have data that follows after =, for example file=name.
1921        Some options have variations in the long name spelling: specifically, we
1922        allow "regexp" because GNU grep allows it, though I personally go along
1923        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1924        These options are entered in the table as "regex(p)". No option is in both
1925        these categories, fortunately. */
1926    
1927        for (op = optionlist; op->one_char != 0; op++)
1928        {        {
1929        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1930          char *equals = strchr(op->long_name, '=');
1931          if (opbra == NULL)     /* Not a (p) case */
1932          {          {
1933          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1934          break;            {
1935              if (strcmp(arg, op->long_name) == 0) break;
1936              }
1937            else                 /* Special case xxx=data */
1938              {
1939              int oplen = equals - op->long_name;
1940              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1941              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1942                {
1943                option_data = arg + arglen;
1944                if (*option_data == '=')
1945                  {
1946                  option_data++;
1947                  longopwasequals = TRUE;
1948                  }
1949                break;
1950                }
1951              }
1952            }
1953          else                   /* Special case xxxx(p) */
1954            {
1955            char buff1[24];
1956            char buff2[24];
1957            int baselen = opbra - op->long_name;
1958            sprintf(buff1, "%.*s", baselen, op->long_name);
1959            sprintf(buff2, "%s%.*s", buff1,
1960              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1961            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1962              break;
1963          }          }
1964        }        }
1965    
1966      if (op->one_char == 0)      if (op->one_char == 0)
1967        {        {
1968        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 550  for (i = 1; i < argc; i++) Line 1970  for (i = 1; i < argc; i++)
1970        }        }
1971      }      }
1972    
1973    /* One-char options */  
1974      /* Jeffrey Friedl's debugging harness uses these additional options which
1975      are not in the right form for putting in the option table because they use
1976      only one hyphen, yet are more than one character long. By putting them
1977      separately here, they will not get displayed as part of the help() output,
1978      but I don't think Jeffrey will care about that. */
1979    
1980    #ifdef JFRIEDL_DEBUG
1981      else if (strcmp(argv[i], "-pre") == 0) {
1982              jfriedl_prefix = argv[++i];
1983              continue;
1984      } else if (strcmp(argv[i], "-post") == 0) {
1985              jfriedl_postfix = argv[++i];
1986              continue;
1987      } else if (strcmp(argv[i], "-XT") == 0) {
1988              sscanf(argv[++i], "%d", &jfriedl_XT);
1989              continue;
1990      } else if (strcmp(argv[i], "-XR") == 0) {
1991              sscanf(argv[++i], "%d", &jfriedl_XR);
1992              continue;
1993      }
1994    #endif
1995    
1996    
1997      /* One-char options; many that have no data may be in a single argument; we
1998      continue till we hit the last one or one that needs data. */
1999    
2000    else    else
2001      {      {
2002      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2003        longop = FALSE;
2004      while (*s != 0)      while (*s != 0)
2005        {        {
2006        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2007            { if (*s == op->one_char) break; }
2008          if (op->one_char == 0)
2009          {          {
2010          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2011          if (pattern_filename[0] == 0)            *s, argv[i]);
2012            {          exit(usage(2));
2013            if (i >= argc - 1)          }
2014              {        if (op->type != OP_NODATA || s[1] == 0)
2015              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
2016              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
2017          break;          break;
2018          }          }
2019        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
2020        }        }
2021      }      }
   }  
2022    
2023  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
2024  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
2025      something in the PCRE options. */
2026    
2027  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
2028    {      {
2029    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
2030    return 2;      continue;
2031    }      }
2032    
2033  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2034      either has a value or defaults to something. It cannot have data in a
2035      separate item. At the moment, the only such options are "colo(u)r" and
2036      Jeffrey Friedl's special -S debugging option. */
2037    
2038  if (pattern_filename != NULL)    if (*option_data == 0 &&
2039    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
2040      {      {
2041      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
2042        strerror(errno));        {
2043      return 2;        case N_COLOUR:
2044          colour_option = (char *)"auto";
2045          break;
2046    #ifdef JFRIEDL_DEBUG
2047          case 'S':
2048          S_arg = 0;
2049          break;
2050    #endif
2051          }
2052        continue;
2053      }      }
2054    while (fgets(buffer, sizeof(buffer), f) != NULL)  
2055      /* Otherwise, find the data string for the option. */
2056    
2057      if (*option_data == 0)
2058      {      {
2059      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
2060      if (pattern_count >= MAX_PATTERN_COUNT)        {
2061          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2062          exit(usage(2));
2063          }
2064        option_data = argv[++i];
2065        }
2066    
2067      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2068      multiple times to create a list of patterns. */
2069    
2070      if (op->type == OP_PATLIST)
2071        {
2072        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2073        {        {
2074        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2075          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2076        return 2;        return 2;
2077        }        }
2078      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2079      if (s == buffer) continue;      }
2080      *s = 0;  
2081      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2082        &errptr, NULL);  
2083      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2084        {
2085        *((char **)op->dataptr) = option_data;
2086        }
2087      else
2088        {
2089        char *endptr;
2090        int n = strtoul(option_data, &endptr, 10);
2091        if (*endptr != 0)
2092        {        {
2093        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
2094          pattern_count, errptr, error);          {
2095        return 2;          char *equals = strchr(op->long_name, '=');
2096            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2097              equals - op->long_name;
2098            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2099              option_data, nlen, op->long_name);
2100            }
2101          else
2102            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2103              option_data, op->one_char);
2104          exit(usage(2));
2105        }        }
2106        *((int *)op->dataptr) = n;
2107        }
2108      }
2109    
2110    /* Options have been decoded. If -C was used, its value is used as a default
2111    for -A and -B. */
2112    
2113    if (both_context > 0)
2114      {
2115      if (after_context == 0) after_context = both_context;
2116      if (before_context == 0) before_context = both_context;
2117      }
2118    
2119    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2120    However, the latter two set the only_matching flag. */
2121    
2122    if ((only_matching && (file_offsets || line_offsets)) ||
2123        (file_offsets && line_offsets))
2124      {
2125      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2126        "and/or --line-offsets\n");
2127      exit(usage(2));
2128      }
2129    
2130    if (file_offsets || line_offsets) only_matching = TRUE;
2131    
2132    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2133    LC_ALL environment variable is set, and if so, use it. */
2134    
2135    if (locale == NULL)
2136      {
2137      locale = getenv("LC_ALL");
2138      locale_from = "LCC_ALL";
2139      }
2140    
2141    if (locale == NULL)
2142      {
2143      locale = getenv("LC_CTYPE");
2144      locale_from = "LC_CTYPE";
2145      }
2146    
2147    /* If a locale has been provided, set it, and generate the tables the PCRE
2148    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2149    
2150    if (locale != NULL)
2151      {
2152      if (setlocale(LC_CTYPE, locale) == NULL)
2153        {
2154        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2155          locale, locale_from);
2156        return 2;
2157        }
2158      pcretables = pcre_maketables();
2159      }
2160    
2161    /* Sort out colouring */
2162    
2163    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2164      {
2165      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2166      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2167      else
2168        {
2169        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2170          colour_option);
2171        return 2;
2172        }
2173      if (do_colour)
2174        {
2175        char *cs = getenv("PCREGREP_COLOUR");
2176        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2177        if (cs != NULL) colour_string = cs;
2178      }      }
   fclose(f);  
2179    }    }
2180    
2181  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2182    
2183    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2184      {
2185      pcre_options |= PCRE_NEWLINE_CR;
2186      endlinetype = EL_CR;
2187      }
2188    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2189      {
2190      pcre_options |= PCRE_NEWLINE_LF;
2191      endlinetype = EL_LF;
2192      }
2193    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2194      {
2195      pcre_options |= PCRE_NEWLINE_CRLF;
2196      endlinetype = EL_CRLF;
2197      }
2198    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2199      {
2200      pcre_options |= PCRE_NEWLINE_ANY;
2201      endlinetype = EL_ANY;
2202      }
2203    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2204      {
2205      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2206      endlinetype = EL_ANYCRLF;
2207      }
2208  else  else
2209    {    {
2210    if (i >= argc) return usage(2);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2211    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2212    if (pattern_list[0] == NULL)    }
2213    
2214    /* Interpret the text values for -d and -D */
2215    
2216    if (dee_option != NULL)
2217      {
2218      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2219      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2220      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2221      else
2222      {      {
2223      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
       error);  
2224      return 2;      return 2;
2225      }      }
   pattern_count++;  
2226    }    }
2227    
2228  /* Study the regular expressions, as we will be running them may times */  if (DEE_option != NULL)
2229      {
2230      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2231      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2232      else
2233        {
2234        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2235        return 2;
2236        }
2237      }
2238    
2239    /* Check the values for Jeffrey Friedl's debugging options. */
2240    
2241    #ifdef JFRIEDL_DEBUG
2242    if (S_arg > 9)
2243      {
2244      fprintf(stderr, "pcregrep: bad value for -S option\n");
2245      return 2;
2246      }
2247    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2248      {
2249      if (jfriedl_XT == 0) jfriedl_XT = 1;
2250      if (jfriedl_XR == 0) jfriedl_XR = 1;
2251      }
2252    #endif
2253    
2254    /* Get memory to store the pattern and hints lists. */
2255    
2256    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2257    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2258    
2259    if (pattern_list == NULL || hints_list == NULL)
2260      {
2261      fprintf(stderr, "pcregrep: malloc failed\n");
2262      goto EXIT2;
2263      }
2264    
2265    /* If no patterns were provided by -e, and there is no file provided by -f,
2266    the first argument is the one and only pattern, and it must exist. */
2267    
2268    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2269      {
2270      if (i >= argc) return usage(2);
2271      patterns[cmd_pattern_count++] = argv[i++];
2272      }
2273    
2274    /* Compile the patterns that were provided on the command line, either by
2275    multiple uses of -e or as a single unkeyed pattern. */
2276    
2277    for (j = 0; j < cmd_pattern_count; j++)
2278      {
2279      if (!compile_pattern(patterns[j], pcre_options, NULL,
2280           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2281        goto EXIT2;
2282      }
2283    
2284    /* Compile the regular expressions that are provided in a file. */
2285    
2286    if (pattern_filename != NULL)
2287      {
2288      int linenumber = 0;
2289      FILE *f;
2290      char *filename;
2291      char buffer[MBUFTHIRD];
2292    
2293      if (strcmp(pattern_filename, "-") == 0)
2294        {
2295        f = stdin;
2296        filename = stdin_name;
2297        }
2298      else
2299        {
2300        f = fopen(pattern_filename, "r");
2301        if (f == NULL)
2302          {
2303          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2304            strerror(errno));
2305          goto EXIT2;
2306          }
2307        filename = pattern_filename;
2308        }
2309    
2310      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2311        {
2312        char *s = buffer + (int)strlen(buffer);
2313        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2314        *s = 0;
2315        linenumber++;
2316        if (buffer[0] == 0) continue;   /* Skip blank lines */
2317        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2318          goto EXIT2;
2319        }
2320    
2321      if (f != stdin) fclose(f);
2322      }
2323    
2324    /* Study the regular expressions, as we will be running them many times */
2325    
2326  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2327    {    {
# Line 646  for (j = 0; j < pattern_count; j++) Line 2331  for (j = 0; j < pattern_count; j++)
2331      char s[16];      char s[16];
2332      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2333      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2334      return 2;      goto EXIT2;
2335        }
2336      hint_count++;
2337      }
2338    
2339    /* If there are include or exclude patterns, compile them. */
2340    
2341    if (exclude_pattern != NULL)
2342      {
2343      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2344        pcretables);
2345      if (exclude_compiled == NULL)
2346        {
2347        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2348          errptr, error);
2349        goto EXIT2;
2350        }
2351      }
2352    
2353    if (include_pattern != NULL)
2354      {
2355      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2356        pcretables);
2357      if (include_compiled == NULL)
2358        {
2359        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2360          errptr, error);
2361        goto EXIT2;
2362        }
2363      }
2364    
2365    if (exclude_dir_pattern != NULL)
2366      {
2367      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2368        pcretables);
2369      if (exclude_dir_compiled == NULL)
2370        {
2371        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2372          errptr, error);
2373        goto EXIT2;
2374        }
2375      }
2376    
2377    if (include_dir_pattern != NULL)
2378      {
2379      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2380        pcretables);
2381      if (include_dir_compiled == NULL)
2382        {
2383        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2384          errptr, error);
2385        goto EXIT2;
2386      }      }
2387    }    }
2388    
2389  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2390    
2391  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2392      {
2393      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2394      goto EXIT;
2395      }
2396    
2397  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2398  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2399  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2400    otherwise forced. */
2401    
2402  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2403    
2404  for (; i < argc; i++)  for (; i < argc; i++)
2405    {    {
2406    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2407    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2408      if (frc > 1) rc = frc;
2409        else if (frc == 0 && rc == 1) rc = 0;
2410    }    }
2411    
2412    EXIT:
2413    if (pattern_list != NULL)
2414      {
2415      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2416      free(pattern_list);
2417      }
2418    if (hints_list != NULL)
2419      {
2420      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2421      free(hints_list);
2422      }
2423  return rc;  return rc;
2424    
2425    EXIT2:
2426    rc = 2;
2427    goto EXIT;
2428  }  }
2429    
2430  /* End */  /* End of pcregrep */

Legend:
Removed from v.75  
changed lines
  Added in v.378

  ViewVC Help
Powered by ViewVC 1.1.5