/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC revision 243 by ph10, Thu Sep 13 09:28:14 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2004 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58  #include "pcre.h"  #include "pcre.h"
59    
60  #define FALSE 0  #define FALSE 0
# Line 50  POSSIBILITY OF SUCH DAMAGE. Line 62  POSSIBILITY OF SUCH DAMAGE.
62    
63  typedef int BOOL;  typedef int BOOL;
64    
 #define VERSION "3.0 14-Jan-2003"  
65  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
66    
67    #if BUFSIZ > 8192
68    #define MBUFTHIRD BUFSIZ
69    #else
70    #define MBUFTHIRD 8192
71    #endif
72    
73    /* Values for the "filenames" variable, which specifies options for file name
74    output. The order is important; it is assumed that a file name is wanted for
75    all values greater than FN_DEFAULT. */
76    
77    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79    /* Actions for the -d and -D options */
80    
81    enum { dee_READ, dee_SKIP, dee_RECURSE };
82    enum { DEE_READ, DEE_SKIP };
83    
84    /* Actions for special processing options (flag bits) */
85    
86    #define PO_WORD_MATCH     0x0001
87    #define PO_LINE_MATCH     0x0002
88    #define PO_FIXED_STRINGS  0x0004
89    
90    /* Line ending types */
91    
92    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
96  /*************************************************  /*************************************************
97  *               Global variables                 *  *               Global variables                 *
98  *************************************************/  *************************************************/
99    
100    /* Jeffrey Friedl has some debugging requirements that are not part of the
101    regular code. */
102    
103    #ifdef JFRIEDL_DEBUG
104    static int S_arg = -1;
105    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107    static const char *jfriedl_prefix = "";
108    static const char *jfriedl_postfix = "";
109    #endif
110    
111    static int  endlinetype;
112    
113    static char *colour_string = (char *)"1;31";
114    static char *colour_option = NULL;
115    static char *dee_option = NULL;
116    static char *DEE_option = NULL;
117    static char *newline = NULL;
118  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
119    static char *stdin_name = (char *)"(standard input)";
120    static char *locale = NULL;
121    
122    static const unsigned char *pcretables = NULL;
123    
124  static int  pattern_count = 0;  static int  pattern_count = 0;
125  static pcre **pattern_list;  static pcre **pattern_list = NULL;
126  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
127    
128    static char *include_pattern = NULL;
129    static char *exclude_pattern = NULL;
130    
131    static pcre *include_compiled = NULL;
132    static pcre *exclude_compiled = NULL;
133    
134    static int after_context = 0;
135    static int before_context = 0;
136    static int both_context = 0;
137    static int dee_action = dee_READ;
138    static int DEE_action = DEE_READ;
139    static int error_count = 0;
140    static int filenames = FN_DEFAULT;
141    static int process_options = 0;
142    
143  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
144  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
145  static BOOL filenames_only = FALSE;  static BOOL hyphenpending = FALSE;
146  static BOOL invert = FALSE;  static BOOL invert = FALSE;
147    static BOOL multiline = FALSE;
148  static BOOL number = FALSE;  static BOOL number = FALSE;
149  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
150    static BOOL quiet = FALSE;
151  static BOOL silent = FALSE;  static BOOL silent = FALSE;
152  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
153    
154  /* Structure for options and list of them */  /* Structure for options and list of them */
155    
156    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
157           OP_PATLIST };
158    
159  typedef struct option_item {  typedef struct option_item {
160      int type;
161    int one_char;    int one_char;
162      void *dataptr;
163    const char *long_name;    const char *long_name;
164    const char *help_text;    const char *help_text;
165  } option_item;  } option_item;
166    
167    /* Options without a single-letter equivalent get a negative value. This can be
168    used to identify them. */
169    
170    #define N_COLOUR    (-1)
171    #define N_EXCLUDE   (-2)
172    #define N_HELP      (-3)
173    #define N_INCLUDE   (-4)
174    #define N_LABEL     (-5)
175    #define N_LOCALE    (-6)
176    #define N_NULL      (-7)
177    
178  static option_item optionlist[] = {  static option_item optionlist[] = {
179    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
180    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
181    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
182    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
183    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
184    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
185    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
186    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
187    { 'u', "utf-8",        "use UTF-8 mode" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
188    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
189    { 'v', "invert-match", "select non-matching lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
190    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
191    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
192    { 0,    NULL,           NULL }    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
193      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
194      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
195      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
196      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
197      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
198      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
199      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
200      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
202      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
203      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
204      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
205      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
206      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
207    #ifdef JFRIEDL_DEBUG
208      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
209    #endif
210      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
211      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
212      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
213      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
214      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
215      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
216      { OP_NODATA,    0,        NULL,               NULL,            NULL }
217  };  };
218    
219    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221    that the combination of -w and -x has the same effect as -x on its own, so we
222    can treat them as the same. */
223    
224    static const char *prefix[] = {
225      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
226    
227    static const char *suffix[] = {
228      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
229    
230    /* UTF-8 tables - used only when the newline setting is "any". */
231    
232    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233    
234    const char utf8_table4[] = {
235      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239    
240    
241    
242  /*************************************************  /*************************************************
243  *       Functions for directory scanning         *  *            OS-specific functions               *
244  *************************************************/  *************************************************/
245    
246  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
247  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
248    
249    
250  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
251    
252  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253  #include <sys/types.h>  #include <sys/types.h>
254  #include <sys/stat.h>  #include <sys/stat.h>
255  #include <dirent.h>  #include <dirent.h>
# Line 141  for (;;) Line 281  for (;;)
281    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282      return dent->d_name;      return dent->d_name;
283    }    }
284  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
285  }  }
286    
287  static void  static void
# Line 151  closedir(dir); Line 291  closedir(dir);
291  }  }
292    
293    
294    /************* Test for regular file in Unix **********/
295    
296    static int
297    isregfile(char *filename)
298    {
299    struct stat statbuf;
300    if (stat(filename, &statbuf) < 0)
301      return 1;        /* In the expectation that opening as a file will fail */
302    return (statbuf.st_mode & S_IFMT) == S_IFREG;
303    }
304    
305    
306    /************* Test stdout for being a terminal in Unix **********/
307    
308    static BOOL
309    is_stdout_tty(void)
310    {
311    return isatty(fileno(stdout));
312    }
313    
314    
315  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
316    
317  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
318  Lionel Fourquaux. */  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
319    when it did not exist. */
320    
321    
322  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
323    
324  #ifndef STRICT  #ifndef STRICT
325  # define STRICT  # define STRICT
# Line 165  Lionel Fourquaux. */ Line 327  Lionel Fourquaux. */
327  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
328  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
329  #endif  #endif
330    #ifndef INVALID_FILE_ATTRIBUTES
331    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
332    #endif
333    
334  #include <windows.h>  #include <windows.h>
335    
336  typedef struct directory_type  typedef struct directory_type
# Line 244  free(dir); Line 410  free(dir);
410  }  }
411    
412    
413    /************* Test for regular file in Win32 **********/
414    
415    /* I don't know how to do this, or if it can be done; assume all paths are
416    regular if they are not directories. */
417    
418    int isregfile(char *filename)
419    {
420    return !isdirectory(filename)
421    }
422    
423    
424    /************* Test stdout for being a terminal in Win32 **********/
425    
426    /* I don't know how to do this; assume never */
427    
428    static BOOL
429    is_stdout_tty(void)
430    {
431    FALSE;
432    }
433    
434    
435  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
436    
437  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 252  free(dir); Line 440  free(dir);
440    
441  typedef void directory_type;  typedef void directory_type;
442    
443  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
444  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
446  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
447    
448    
449    /************* Test for regular when we can't do it **********/
450    
451    /* Assume all files are regular. */
452    
453    int isregfile(char *filename) { return 1; }
454    
455    
456    /************* Test stdout for being a terminal when we can't do it **********/
457    
458    static BOOL
459    is_stdout_tty(void)
460    {
461    return FALSE;
462    }
463    
464    
465  #endif  #endif
466    
467    
468    
469  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
470  /*************************************************  /*************************************************
471  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
472  *************************************************/  *************************************************/
# Line 284  return sys_errlist[n]; Line 489  return sys_errlist[n];
489    
490    
491  /*************************************************  /*************************************************
492  *              Grep an individual file           *  *             Find end of line                   *
493  *************************************************/  *************************************************/
494    
495  static int  /* The length of the endline sequence that is found is set via lenptr. This may
496  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
 {  
 int rc = 1;  
 int linenumber = 0;  
 int count = 0;  
 int offsets[99];  
 char buffer[BUFSIZ];  
497    
498  while (fgets(buffer, sizeof(buffer), in) != NULL)  Arguments:
499      p         current position in line
500      endptr    end of available data
501      lenptr    where to put the length of the eol sequence
502    
503    Returns:    pointer to the last byte of the line
504    */
505    
506    static char *
507    end_of_line(char *p, char *endptr, int *lenptr)
508    {
509    switch(endlinetype)
510    {    {
511    BOOL match = FALSE;    default:      /* Just in case */
512    int i;    case EL_LF:
513    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
514    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
515    linenumber++;      {
516        *lenptr = 1;
517        return p + 1;
518        }
519      *lenptr = 0;
520      return endptr;
521    
522    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
523      while (p < endptr && *p != '\r') p++;
524      if (p < endptr)
525      {      {
526      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
527        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
528      }      }
529      *lenptr = 0;
530      return endptr;
531    
532    if (match != invert)    case EL_CRLF:
533      for (;;)
534      {      {
535      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
536        if (++p >= endptr)
537          {
538          *lenptr = 0;
539          return endptr;
540          }
541        if (*p == '\n')
542          {
543          *lenptr = 2;
544          return p + 1;
545          }
546        }
547      break;
548    
549      case EL_ANYCRLF:
550      while (p < endptr)
551        {
552        int extra = 0;
553        register int c = *((unsigned char *)p);
554    
555      else if (filenames_only)      if (utf8 && c >= 0xc0)
556        {        {
557        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
558        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
559          gcss = 6*extra;
560          c = (c & utf8_table3[extra]) << gcss;
561          for (gcii = 1; gcii <= extra; gcii++)
562            {
563            gcss -= 6;
564            c |= (p[gcii] & 0x3f) << gcss;
565            }
566        }        }
567    
568      else if (silent) return 0;      p += 1 + extra;
569    
570      else      switch (c)
571        {        {
572        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
573        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
574        fprintf(stdout, "%s\n", buffer);        return p;
575    
576          case 0x0d:    /* CR */
577          if (p < endptr && *p == 0x0a)
578            {
579            *lenptr = 2;
580            p++;
581            }
582          else *lenptr = 1;
583          return p;
584    
585          default:
586          break;
587        }        }
588        }   /* End of loop for ANYCRLF case */
589    
590      rc = 0;    *lenptr = 0;  /* Must have hit the end */
591      }    return endptr;
   }  
592    
593  if (count_only)    case EL_ANY:
594    {    while (p < endptr)
595    if (name != NULL) fprintf(stdout, "%s:", name);      {
596    fprintf(stdout, "%d\n", count);      int extra = 0;
597    }      register int c = *((unsigned char *)p);
598    
599  return rc;      if (utf8 && c >= 0xc0)
600  }        {
601          int gcii, gcss;
602          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
603          gcss = 6*extra;
604          c = (c & utf8_table3[extra]) << gcss;
605          for (gcii = 1; gcii <= extra; gcii++)
606            {
607            gcss -= 6;
608            c |= (p[gcii] & 0x3f) << gcss;
609            }
610          }
611    
612        p += 1 + extra;
613    
614        switch (c)
615          {
616          case 0x0a:    /* LF */
617          case 0x0b:    /* VT */
618          case 0x0c:    /* FF */
619          *lenptr = 1;
620          return p;
621    
622          case 0x0d:    /* CR */
623          if (p < endptr && *p == 0x0a)
624            {
625            *lenptr = 2;
626            p++;
627            }
628          else *lenptr = 1;
629          return p;
630    
631          case 0x85:    /* NEL */
632          *lenptr = utf8? 2 : 1;
633          return p;
634    
635          case 0x2028:  /* LS */
636          case 0x2029:  /* PS */
637          *lenptr = 3;
638          return p;
639    
640          default:
641          break;
642          }
643        }   /* End of loop for ANY case */
644    
645      *lenptr = 0;  /* Must have hit the end */
646      return endptr;
647      }     /* End of overall switch */
648    }
649    
650    
651    
652  /*************************************************  /*************************************************
653  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
654  *************************************************/  *************************************************/
655    
656  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL dir_recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
657    
658  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
659  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
660      startptr  start of available data
661    
662  if ((sep = isdirectory(filename)) != 0 && dir_recurse)  Returns:    pointer to the start of the previous line
663    {  */
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
664    
665    if (dir == NULL)  static char *
666      {  previous_line(char *p, char *startptr)
667      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  {
668        strerror(errno));  switch(endlinetype)
669      return 2;    {
670      }    default:      /* Just in case */
671      case EL_LF:
672      p--;
673      while (p > startptr && p[-1] != '\n') p--;
674      return p;
675    
676      case EL_CR:
677      p--;
678      while (p > startptr && p[-1] != '\n') p--;
679      return p;
680    
681    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
682      for (;;)
683      {      {
684      int frc;      p -= 2;
685      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      while (p > startptr && p[-1] != '\n') p--;
686      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);      if (p <= startptr + 1 || p[-2] == '\r') return p;
     if (frc == 0 && rc == 1) rc = 0;  
687      }      }
688      return p;   /* But control should never get here */
689    
690    closedirectory(dir);    case EL_ANY:
691    return rc;    case EL_ANYCRLF:
692    }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693      if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
695  /* If the file is not a directory, or we are not recursing, scan it. If this is    while (p > startptr)
696  the first and only argument at top level, we don't show the file name (unless      {
697  we are only showing the file name). Otherwise, control is via the      register int c;
698  show_filenames variable. */      char *pp = p - 1;
699    
700  in = fopen(filename, "r");      if (utf8)
701  if (in == NULL)        {
702    {        int extra = 0;
703    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));        while ((*pp & 0xc0) == 0x80) pp--;
704    return 2;        c = *((unsigned char *)pp);
705    }        if (c >= 0xc0)
706            {
707            int gcii, gcss;
708            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
709            gcss = 6*extra;
710            c = (c & utf8_table3[extra]) << gcss;
711            for (gcii = 1; gcii <= extra; gcii++)
712              {
713              gcss -= 6;
714              c |= (pp[gcii] & 0x3f) << gcss;
715              }
716            }
717          }
718        else c = *((unsigned char *)pp);
719    
720  rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?      if (endlinetype == EL_ANYCRLF) switch (c)
721    filename : NULL);        {
722  fclose(in);        case 0x0a:    /* LF */
723  return rc;        case 0x0d:    /* CR */
724  }        return p;
725    
726          default:
727          break;
728          }
729    
730        else switch (c)
731          {
732          case 0x0a:    /* LF */
733          case 0x0b:    /* VT */
734          case 0x0c:    /* FF */
735          case 0x0d:    /* CR */
736          case 0x85:    /* NEL */
737          case 0x2028:  /* LS */
738          case 0x2029:  /* PS */
739          return p;
740    
741          default:
742          break;
743          }
744    
745  /*************************************************      p = pp;  /* Back one character */
746  *                Usage function                  *      }        /* End of loop for ANY case */
 *************************************************/  
747    
748  static int    return startptr;  /* Hit start of data */
749  usage(int rc)    }     /* End of overall switch */
 {  
 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
750  }  }
751    
752    
753    
754    
755    
756  /*************************************************  /*************************************************
757  *                Help function                   *  *       Print the previous "after" lines         *
758  *************************************************/  *************************************************/
759    
760  static void  /* This is called if we are about to lose said lines because of buffer filling,
761  help(void)  and at the end of the file. The data in the line is written using fwrite() so
762  {  that a binary zero does not terminate it.
763  option_item *op;  
764    Arguments:
765  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");    lastmatchnumber   the number of the last matching line, plus one
766  printf("Search for PATTERN in each FILE or standard input.\n");    lastmatchrestart  where we restarted after the last match
767  printf("PATTERN must be present if -f is not used.\n");    endptr            end of available data
768  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");    printname         filename for printing
769    
770  printf("Options:\n");  Returns:            nothing
771    */
772    
773  for (op = optionlist; op->one_char != 0; op++)  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
774      char *endptr, char *printname)
775    {
776    if (after_context > 0 && lastmatchnumber > 0)
777    {    {
778    int n;    int count = 0;
779    char s[4];    while (lastmatchrestart < endptr && count++ < after_context)
780    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");      {
781    printf("  %s --%s%n", s, op->long_name, &n);      int ellength;
782    n = 30 - n;      char *pp = lastmatchrestart;
783    if (n < 1) n = 1;      if (printname != NULL) fprintf(stdout, "%s-", printname);
784    printf("%.*s%s\n", n, "                    ", op->help_text);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785        pp = end_of_line(pp, endptr, &ellength);
786        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787        lastmatchrestart = pp;
788        }
789      hyphenpending = TRUE;
790    }    }
   
 printf("\n  -f<filename>  or  --file=<filename>\n");  
 printf("    Read patterns from <filename> instead of using a command line option.\n");  
 printf("    Trailing white space is removed; blanks lines are ignored.\n");  
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
791  }  }
792    
793    
794    
   
795  /*************************************************  /*************************************************
796  *                Handle an option                *  *            Grep an individual file             *
797  *************************************************/  *************************************************/
798    
799    /* This is called from grep_or_recurse() below. It uses a buffer that is three
800    times the value of MBUFTHIRD. The matching point is never allowed to stray into
801    the top third of the buffer, thus keeping more of the file available for
802    context printing or for multiline scanning. For large files, the pointer will
803    be in the middle third most of the time, so the bottom third is available for
804    "before" context printing.
805    
806    Arguments:
807      in           the fopened FILE stream
808      printname    the file name if it is to be printed for each match
809                   or NULL if the file name is not to be printed
810                   it cannot be NULL if filenames[_nomatch]_only is set
811    
812    Returns:       0 if there was at least one match
813                   1 otherwise (no matches)
814    */
815    
816  static int  static int
817  handle_option(int letter, int options)  pcregrep(FILE *in, char *printname)
818  {  {
819  switch(letter)  int rc = 1;
820    {  int linenumber = 1;
821    case -1:  help(); exit(0);  int lastmatchnumber = 0;
822    case 'c': count_only = TRUE; break;  int count = 0;
823    case 'h': filenames = FALSE; break;  int offsets[99];
824    case 'i': options |= PCRE_CASELESS; break;  char *lastmatchrestart = NULL;
825    case 'l': filenames_only = TRUE;  char buffer[3*MBUFTHIRD];
826    case 'n': number = TRUE; break;  char *ptr = buffer;
827    case 'r': recurse = TRUE; break;  char *endptr;
828    case 's': silent = TRUE; break;  size_t bufflength;
829    case 'u': options |= PCRE_UTF8; break;  BOOL endhyphenpending = FALSE;
830    case 'v': invert = TRUE; break;  
831    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  /* Do the first read into the start of the buffer and set up the pointer to
832    end of what we have. */
833    
834    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
835    endptr = buffer + bufflength;
836    
837    /* Loop while the current pointer is not at the end of the file. For large
838    files, endptr will be at the end of the buffer when we are in the middle of the
839    file, but ptr will never get there, because as soon as it gets over 2/3 of the
840    way, the buffer is shifted left and re-filled. */
841    
842    case 'V':  while (ptr < endptr)
843    fprintf(stderr, "pcregrep version %s using ", VERSION);    {
844    fprintf(stderr, "PCRE version %s\n", pcre_version());    int i, endlinelength;
845    exit(0);    int mrc = 0;
846    break;    BOOL match = FALSE;
847      char *t = ptr;
848      size_t length, linelength;
849    
850    default:    /* At this point, ptr is at the start of a line. We need to find the length
851    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    of the subject string to pass to pcre_exec(). In multiline mode, it is the
852    exit(usage(2));    length remainder of the data in the buffer. Otherwise, it is the length of
853    }    the next line. After matching, we always advance by the length of the next
854      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
855      that any match is constrained to be in the first line. */
856    
857      t = end_of_line(t, endptr, &endlinelength);
858      linelength = t - ptr - endlinelength;
859      length = multiline? (size_t)(endptr - ptr) : linelength;
860    
861  return options;    /* Extra processing for Jeffrey Friedl's debugging. */
 }  
862    
863    #ifdef JFRIEDL_DEBUG
864      if (jfriedl_XT || jfriedl_XR)
865      {
866          #include <sys/time.h>
867          #include <time.h>
868          struct timeval start_time, end_time;
869          struct timezone dummy;
870    
871          if (jfriedl_XT)
872          {
873              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
874              const char *orig = ptr;
875              ptr = malloc(newlen + 1);
876              if (!ptr) {
877                      printf("out of memory");
878                      exit(2);
879              }
880              endptr = ptr;
881              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
882              for (i = 0; i < jfriedl_XT; i++) {
883                      strncpy(endptr, orig,  length);
884                      endptr += length;
885              }
886              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
887              length = newlen;
888          }
889    
890          if (gettimeofday(&start_time, &dummy) != 0)
891                  perror("bad gettimeofday");
892    
 /*************************************************  
 *                Main program                    *  
 *************************************************/  
893    
894  int        for (i = 0; i < jfriedl_XR; i++)
895  main(int argc, char **argv)            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
 {  
 int i, j;  
 int rc = 1;  
 int options = 0;  
 int errptr;  
 const char *error;  
 BOOL only_one_at_top;  
896    
897  /* Process the options */        if (gettimeofday(&end_time, &dummy) != 0)
898                  perror("bad gettimeofday");
899    
900  for (i = 1; i < argc; i++)        double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
901    {                        -
902    if (argv[i][0] != '-') break;                        (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
903    
904    /* Missing options */        printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
905          return 0;
906      }
907    #endif
908    
   if (argv[i][1] == 0) exit(usage(2));  
909    
910    /* Long name options */    /* Run through all the patterns until one matches. Note that we don't include
911      the final newline in the subject string. */
912    
913    if (argv[i][1] == '-')    for (i = 0; i < pattern_count; i++)
914      {      {
915      option_item *op;      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
916          offsets, 99);
917      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (mrc >= 0) { match = TRUE; break; }
918        {      if (mrc != PCRE_ERROR_NOMATCH)
       pattern_filename = argv[i] + 7;  
       continue;  
       }  
   
     for (op = optionlist; op->one_char != 0; op++)  
919        {        {
920        if (strcmp(argv[i]+2, op->long_name) == 0)        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
921          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
922          fprintf(stderr, "this line:\n");
923          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
924          fprintf(stderr, "\n");
925          if (error_count == 0 &&
926              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
927          {          {
928          options = handle_option(op->one_char, options);          fprintf(stderr, "pcregrep: error %d means that a resource limit "
929          break;            "was exceeded\n", mrc);
930            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
931            }
932          if (error_count++ > 20)
933            {
934            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
935            exit(2);
936            }
937          match = invert;    /* No more matching; don't show the line again */
938          break;
939          }
940        }
941    
942      /* If it's a match or a not-match (as required), do what's wanted. */
943    
944      if (match != invert)
945        {
946        BOOL hyphenprinted = FALSE;
947    
948        /* We've failed if we want a file that doesn't have any matches. */
949    
950        if (filenames == FN_NOMATCH_ONLY) return 1;
951    
952        /* Just count if just counting is wanted. */
953    
954        if (count_only) count++;
955    
956        /* If all we want is a file name, there is no need to scan any more lines
957        in the file. */
958    
959        else if (filenames == FN_ONLY)
960          {
961          fprintf(stdout, "%s\n", printname);
962          return 0;
963          }
964    
965        /* Likewise, if all we want is a yes/no answer. */
966    
967        else if (quiet) return 0;
968    
969        /* The --only-matching option prints just the substring that matched, and
970        does not pring any context. */
971    
972        else if (only_matching)
973          {
974          if (printname != NULL) fprintf(stdout, "%s:", printname);
975          if (number) fprintf(stdout, "%d:", linenumber);
976          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
977          fprintf(stdout, "\n");
978          }
979    
980        /* This is the default case when none of the above options is set. We print
981        the matching lines(s), possibly preceded and/or followed by other lines of
982        context. */
983    
984        else
985          {
986          /* See if there is a requirement to print some "after" lines from a
987          previous match. We never print any overlaps. */
988    
989          if (after_context > 0 && lastmatchnumber > 0)
990            {
991            int ellength;
992            int linecount = 0;
993            char *p = lastmatchrestart;
994    
995            while (p < ptr && linecount < after_context)
996              {
997              p = end_of_line(p, ptr, &ellength);
998              linecount++;
999              }
1000    
1001            /* It is important to advance lastmatchrestart during this printing so
1002            that it interacts correctly with any "before" printing below. Print
1003            each line's data using fwrite() in case there are binary zeroes. */
1004    
1005            while (lastmatchrestart < p)
1006              {
1007              char *pp = lastmatchrestart;
1008              if (printname != NULL) fprintf(stdout, "%s-", printname);
1009              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1010              pp = end_of_line(pp, endptr, &ellength);
1011              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1012              lastmatchrestart = pp;
1013              }
1014            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1015            }
1016    
1017          /* If there were non-contiguous lines printed above, insert hyphens. */
1018    
1019          if (hyphenpending)
1020            {
1021            fprintf(stdout, "--\n");
1022            hyphenpending = FALSE;
1023            hyphenprinted = TRUE;
1024            }
1025    
1026          /* See if there is a requirement to print some "before" lines for this
1027          match. Again, don't print overlaps. */
1028    
1029          if (before_context > 0)
1030            {
1031            int linecount = 0;
1032            char *p = ptr;
1033    
1034            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1035                   linecount < before_context)
1036              {
1037              linecount++;
1038              p = previous_line(p, buffer);
1039              }
1040    
1041            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1042              fprintf(stdout, "--\n");
1043    
1044            while (p < ptr)
1045              {
1046              int ellength;
1047              char *pp = p;
1048              if (printname != NULL) fprintf(stdout, "%s-", printname);
1049              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1050              pp = end_of_line(pp, endptr, &ellength);
1051              fwrite(p, 1, pp - p, stdout);
1052              p = pp;
1053              }
1054            }
1055    
1056          /* Now print the matching line(s); ensure we set hyphenpending at the end
1057          of the file if any context lines are being output. */
1058    
1059          if (after_context > 0 || before_context > 0)
1060            endhyphenpending = TRUE;
1061    
1062          if (printname != NULL) fprintf(stdout, "%s:", printname);
1063          if (number) fprintf(stdout, "%d:", linenumber);
1064    
1065          /* In multiline mode, we want to print to the end of the line in which
1066          the end of the matched string is found, so we adjust linelength and the
1067          line number appropriately, but only when there actually was a match
1068          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069          the match will always be before the first newline sequence. */
1070    
1071          if (multiline)
1072            {
1073            int ellength;
1074            char *endmatch = ptr;
1075            if (!invert)
1076              {
1077              endmatch += offsets[1];
1078              t = ptr;
1079              while (t < endmatch)
1080                {
1081                t = end_of_line(t, endptr, &ellength);
1082                if (t <= endmatch) linenumber++; else break;
1083                }
1084              }
1085            endmatch = end_of_line(endmatch, endptr, &ellength);
1086            linelength = endmatch - ptr - ellength;
1087            }
1088    
1089          /*** NOTE: Use only fwrite() to output the data line, so that binary
1090          zeroes are treated as just another data character. */
1091    
1092          /* This extra option, for Jeffrey Friedl's debugging requirements,
1093          replaces the matched string, or a specific captured string if it exists,
1094          with X. When this happens, colouring is ignored. */
1095    
1096    #ifdef JFRIEDL_DEBUG
1097          if (S_arg >= 0 && S_arg < mrc)
1098            {
1099            int first = S_arg * 2;
1100            int last  = first + 1;
1101            fwrite(ptr, 1, offsets[first], stdout);
1102            fprintf(stdout, "X");
1103            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1104            }
1105          else
1106    #endif
1107    
1108          /* We have to split the line(s) up if colouring. */
1109    
1110          if (do_colour)
1111            {
1112            fwrite(ptr, 1, offsets[0], stdout);
1113            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1114            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1115            fprintf(stdout, "%c[00m", 0x1b);
1116            fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1117              stdout);
1118            }
1119          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1120          }
1121    
1122        /* End of doing what has to be done for a match */
1123    
1124        rc = 0;    /* Had some success */
1125    
1126        /* Remember where the last match happened for after_context. We remember
1127        where we are about to restart, and that line's number. */
1128    
1129        lastmatchrestart = ptr + linelength + endlinelength;
1130        lastmatchnumber = linenumber + 1;
1131        }
1132    
1133      /* For a match in multiline inverted mode (which of course did not cause
1134      anything to be printed), we have to move on to the end of the match before
1135      proceeding. */
1136    
1137      if (multiline && invert && match)
1138        {
1139        int ellength;
1140        char *endmatch = ptr + offsets[1];
1141        t = ptr;
1142        while (t < endmatch)
1143          {
1144          t = end_of_line(t, endptr, &ellength);
1145          if (t <= endmatch) linenumber++; else break;
1146          }
1147        endmatch = end_of_line(endmatch, endptr, &ellength);
1148        linelength = endmatch - ptr - ellength;
1149        }
1150    
1151      /* Advance to after the newline and increment the line number. */
1152    
1153      ptr += linelength + endlinelength;
1154      linenumber++;
1155    
1156      /* If we haven't yet reached the end of the file (the buffer is full), and
1157      the current point is in the top 1/3 of the buffer, slide the buffer down by
1158      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1159      about to be lost, print them. */
1160    
1161      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1162        {
1163        if (after_context > 0 &&
1164            lastmatchnumber > 0 &&
1165            lastmatchrestart < buffer + MBUFTHIRD)
1166          {
1167          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1168          lastmatchnumber = 0;
1169          }
1170    
1171        /* Now do the shuffle */
1172    
1173        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1174        ptr -= MBUFTHIRD;
1175        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1176        endptr = buffer + bufflength;
1177    
1178        /* Adjust any last match point */
1179    
1180        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1181        }
1182      }     /* Loop through the whole file */
1183    
1184    /* End of file; print final "after" lines if wanted; do_after_lines sets
1185    hyphenpending if it prints something. */
1186    
1187    if (!only_matching && !count_only)
1188      {
1189      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1190      hyphenpending |= endhyphenpending;
1191      }
1192    
1193    /* Print the file name if we are looking for those without matches and there
1194    were none. If we found a match, we won't have got this far. */
1195    
1196    if (filenames == FN_NOMATCH_ONLY)
1197      {
1198      fprintf(stdout, "%s\n", printname);
1199      return 0;
1200      }
1201    
1202    /* Print the match count if wanted */
1203    
1204    if (count_only)
1205      {
1206      if (printname != NULL) fprintf(stdout, "%s:", printname);
1207      fprintf(stdout, "%d\n", count);
1208      }
1209    
1210    return rc;
1211    }
1212    
1213    
1214    
1215    /*************************************************
1216    *     Grep a file or recurse into a directory    *
1217    *************************************************/
1218    
1219    /* Given a path name, if it's a directory, scan all the files if we are
1220    recursing; if it's a file, grep it.
1221    
1222    Arguments:
1223      pathname          the path to investigate
1224      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1225      only_one_at_top   TRUE if the path is the only one at toplevel
1226    
1227    Returns:   0 if there was at least one match
1228               1 if there were no matches
1229               2 there was some kind of error
1230    
1231    However, file opening failures are suppressed if "silent" is set.
1232    */
1233    
1234    static int
1235    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1236    {
1237    int rc = 1;
1238    int sep;
1239    FILE *in;
1240    
1241    /* If the file name is "-" we scan stdin */
1242    
1243    if (strcmp(pathname, "-") == 0)
1244      {
1245      return pcregrep(stdin,
1246        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1247          stdin_name : NULL);
1248      }
1249    
1250    
1251    /* If the file is a directory, skip if skipping or if we are recursing, scan
1252    each file within it, subject to any include or exclude patterns that were set.
1253    The scanning code is localized so it can be made system-specific. */
1254    
1255    if ((sep = isdirectory(pathname)) != 0)
1256      {
1257      if (dee_action == dee_SKIP) return 1;
1258      if (dee_action == dee_RECURSE)
1259        {
1260        char buffer[1024];
1261        char *nextfile;
1262        directory_type *dir = opendirectory(pathname);
1263    
1264        if (dir == NULL)
1265          {
1266          if (!silent)
1267            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1268              strerror(errno));
1269          return 2;
1270          }
1271    
1272        while ((nextfile = readdirectory(dir)) != NULL)
1273          {
1274          int frc, blen;
1275          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1276          blen = strlen(buffer);
1277    
1278          if (exclude_compiled != NULL &&
1279              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1280            continue;
1281    
1282          if (include_compiled != NULL &&
1283              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1284            continue;
1285    
1286          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1287          if (frc > 1) rc = frc;
1288           else if (frc == 0 && rc == 1) rc = 0;
1289          }
1290    
1291        closedirectory(dir);
1292        return rc;
1293        }
1294      }
1295    
1296    /* If the file is not a directory and not a regular file, skip it if that's
1297    been requested. */
1298    
1299    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1300    
1301    /* Control reaches here if we have a regular file, or if we have a directory
1302    and recursion or skipping was not requested, or if we have anything else and
1303    skipping was not requested. The scan proceeds. If this is the first and only
1304    argument at top level, we don't show the file name, unless we are only showing
1305    the file name, or the filename was forced (-H). */
1306    
1307    in = fopen(pathname, "r");
1308    if (in == NULL)
1309      {
1310      if (!silent)
1311        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1312          strerror(errno));
1313      return 2;
1314      }
1315    
1316    rc = pcregrep(in, (filenames > FN_DEFAULT ||
1317      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1318    
1319    fclose(in);
1320    return rc;
1321    }
1322    
1323    
1324    
1325    
1326    /*************************************************
1327    *                Usage function                  *
1328    *************************************************/
1329    
1330    static int
1331    usage(int rc)
1332    {
1333    option_item *op;
1334    fprintf(stderr, "Usage: pcregrep [-");
1335    for (op = optionlist; op->one_char != 0; op++)
1336      {
1337      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1338      }
1339    fprintf(stderr, "] [long options] [pattern] [files]\n");
1340    fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1341    return rc;
1342    }
1343    
1344    
1345    
1346    
1347    /*************************************************
1348    *                Help function                   *
1349    *************************************************/
1350    
1351    static void
1352    help(void)
1353    {
1354    option_item *op;
1355    
1356    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1357    printf("Search for PATTERN in each FILE or standard input.\n");
1358    printf("PATTERN must be present if neither -e nor -f is used.\n");
1359    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1360    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1361    
1362    printf("Options:\n");
1363    
1364    for (op = optionlist; op->one_char != 0; op++)
1365      {
1366      int n;
1367      char s[4];
1368      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1369      printf("  %s --%s%n", s, op->long_name, &n);
1370      n = 30 - n;
1371      if (n < 1) n = 1;
1372      printf("%.*s%s\n", n, "                    ", op->help_text);
1373      }
1374    
1375    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1376    printf("trailing white space is removed and blank lines are ignored.\n");
1377    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1378    
1379    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1380    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1381    }
1382    
1383    
1384    
1385    
1386    /*************************************************
1387    *    Handle a single-letter, no data option      *
1388    *************************************************/
1389    
1390    static int
1391    handle_option(int letter, int options)
1392    {
1393    switch(letter)
1394      {
1395      case N_HELP: help(); exit(0);
1396      case 'c': count_only = TRUE; break;
1397      case 'F': process_options |= PO_FIXED_STRINGS; break;
1398      case 'H': filenames = FN_FORCE; break;
1399      case 'h': filenames = FN_NONE; break;
1400      case 'i': options |= PCRE_CASELESS; break;
1401      case 'l': filenames = FN_ONLY; break;
1402      case 'L': filenames = FN_NOMATCH_ONLY; break;
1403      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1404      case 'n': number = TRUE; break;
1405      case 'o': only_matching = TRUE; break;
1406      case 'q': quiet = TRUE; break;
1407      case 'r': dee_action = dee_RECURSE; break;
1408      case 's': silent = TRUE; break;
1409      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1410      case 'v': invert = TRUE; break;
1411      case 'w': process_options |= PO_WORD_MATCH; break;
1412      case 'x': process_options |= PO_LINE_MATCH; break;
1413    
1414      case 'V':
1415      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1416      exit(0);
1417      break;
1418    
1419      default:
1420      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1421      exit(usage(2));
1422      }
1423    
1424    return options;
1425    }
1426    
1427    
1428    
1429    
1430    /*************************************************
1431    *          Construct printed ordinal             *
1432    *************************************************/
1433    
1434    /* This turns a number into "1st", "3rd", etc. */
1435    
1436    static char *
1437    ordin(int n)
1438    {
1439    static char buffer[8];
1440    char *p = buffer;
1441    sprintf(p, "%d", n);
1442    while (*p != 0) p++;
1443    switch (n%10)
1444      {
1445      case 1: strcpy(p, "st"); break;
1446      case 2: strcpy(p, "nd"); break;
1447      case 3: strcpy(p, "rd"); break;
1448      default: strcpy(p, "th"); break;
1449      }
1450    return buffer;
1451    }
1452    
1453    
1454    
1455    /*************************************************
1456    *          Compile a single pattern              *
1457    *************************************************/
1458    
1459    /* When the -F option has been used, this is called for each substring.
1460    Otherwise it's called for each supplied pattern.
1461    
1462    Arguments:
1463      pattern        the pattern string
1464      options        the PCRE options
1465      filename       the file name, or NULL for a command-line pattern
1466      count          0 if this is the only command line pattern, or
1467                     number of the command line pattern, or
1468                     linenumber for a pattern from a file
1469    
1470    Returns:         TRUE on success, FALSE after an error
1471    */
1472    
1473    static BOOL
1474    compile_single_pattern(char *pattern, int options, char *filename, int count)
1475    {
1476    char buffer[MBUFTHIRD + 16];
1477    const char *error;
1478    int errptr;
1479    
1480    if (pattern_count >= MAX_PATTERN_COUNT)
1481      {
1482      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1483        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1484      return FALSE;
1485      }
1486    
1487    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1488      suffix[process_options]);
1489    pattern_list[pattern_count] =
1490      pcre_compile(buffer, options, &error, &errptr, pcretables);
1491    if (pattern_list[pattern_count] != NULL)
1492      {
1493      pattern_count++;
1494      return TRUE;
1495      }
1496    
1497    /* Handle compile errors */
1498    
1499    errptr -= (int)strlen(prefix[process_options]);
1500    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1501    
1502    if (filename == NULL)
1503      {
1504      if (count == 0)
1505        fprintf(stderr, "pcregrep: Error in command-line regex "
1506          "at offset %d: %s\n", errptr, error);
1507      else
1508        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1509          "at offset %d: %s\n", ordin(count), errptr, error);
1510      }
1511    else
1512      {
1513      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1514        "at offset %d: %s\n", count, filename, errptr, error);
1515      }
1516    
1517    return FALSE;
1518    }
1519    
1520    
1521    
1522    /*************************************************
1523    *           Compile one supplied pattern         *
1524    *************************************************/
1525    
1526    /* When the -F option has been used, each string may be a list of strings,
1527    separated by line breaks. They will be matched literally.
1528    
1529    Arguments:
1530      pattern        the pattern string
1531      options        the PCRE options
1532      filename       the file name, or NULL for a command-line pattern
1533      count          0 if this is the only command line pattern, or
1534                     number of the command line pattern, or
1535                     linenumber for a pattern from a file
1536    
1537    Returns:         TRUE on success, FALSE after an error
1538    */
1539    
1540    static BOOL
1541    compile_pattern(char *pattern, int options, char *filename, int count)
1542    {
1543    if ((process_options & PO_FIXED_STRINGS) != 0)
1544      {
1545      char *eop = pattern + strlen(pattern);
1546      char buffer[MBUFTHIRD];
1547      for(;;)
1548        {
1549        int ellength;
1550        char *p = end_of_line(pattern, eop, &ellength);
1551        if (ellength == 0)
1552          return compile_single_pattern(pattern, options, filename, count);
1553        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1554        pattern = p;
1555        if (!compile_single_pattern(buffer, options, filename, count))
1556          return FALSE;
1557        }
1558      }
1559    else return compile_single_pattern(pattern, options, filename, count);
1560    }
1561    
1562    
1563    
1564    /*************************************************
1565    *                Main program                    *
1566    *************************************************/
1567    
1568    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1569    
1570    int
1571    main(int argc, char **argv)
1572    {
1573    int i, j;
1574    int rc = 1;
1575    int pcre_options = 0;
1576    int cmd_pattern_count = 0;
1577    int hint_count = 0;
1578    int errptr;
1579    BOOL only_one_at_top;
1580    char *patterns[MAX_PATTERN_COUNT];
1581    const char *locale_from = "--locale";
1582    const char *error;
1583    
1584    /* Set the default line ending value from the default in the PCRE library;
1585    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1586    */
1587    
1588    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1589    switch(i)
1590      {
1591      default:                 newline = (char *)"lf"; break;
1592      case '\r':               newline = (char *)"cr"; break;
1593      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1594      case -1:                 newline = (char *)"any"; break;
1595      case -2:                 newline = (char *)"anycrlf"; break;
1596      }
1597    
1598    /* Process the options */
1599    
1600    for (i = 1; i < argc; i++)
1601      {
1602      option_item *op = NULL;
1603      char *option_data = (char *)"";    /* default to keep compiler happy */
1604      BOOL longop;
1605      BOOL longopwasequals = FALSE;
1606    
1607      if (argv[i][0] != '-') break;
1608    
1609      /* If we hit an argument that is just "-", it may be a reference to STDIN,
1610      but only if we have previously had -e or -f to define the patterns. */
1611    
1612      if (argv[i][1] == 0)
1613        {
1614        if (pattern_filename != NULL || pattern_count > 0) break;
1615          else exit(usage(2));
1616        }
1617    
1618      /* Handle a long name option, or -- to terminate the options */
1619    
1620      if (argv[i][1] == '-')
1621        {
1622        char *arg = argv[i] + 2;
1623        char *argequals = strchr(arg, '=');
1624    
1625        if (*arg == 0)    /* -- terminates options */
1626          {
1627          i++;
1628          break;                /* out of the options-handling loop */
1629          }
1630    
1631        longop = TRUE;
1632    
1633        /* Some long options have data that follows after =, for example file=name.
1634        Some options have variations in the long name spelling: specifically, we
1635        allow "regexp" because GNU grep allows it, though I personally go along
1636        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1637        These options are entered in the table as "regex(p)". No option is in both
1638        these categories, fortunately. */
1639    
1640        for (op = optionlist; op->one_char != 0; op++)
1641          {
1642          char *opbra = strchr(op->long_name, '(');
1643          char *equals = strchr(op->long_name, '=');
1644          if (opbra == NULL)     /* Not a (p) case */
1645            {
1646            if (equals == NULL)  /* Not thing=data case */
1647              {
1648              if (strcmp(arg, op->long_name) == 0) break;
1649              }
1650            else                 /* Special case xxx=data */
1651              {
1652              int oplen = equals - op->long_name;
1653              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1654              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1655                {
1656                option_data = arg + arglen;
1657                if (*option_data == '=')
1658                  {
1659                  option_data++;
1660                  longopwasequals = TRUE;
1661                  }
1662                break;
1663                }
1664              }
1665            }
1666          else                   /* Special case xxxx(p) */
1667            {
1668            char buff1[24];
1669            char buff2[24];
1670            int baselen = opbra - op->long_name;
1671            sprintf(buff1, "%.*s", baselen, op->long_name);
1672            sprintf(buff2, "%s%.*s", buff1,
1673              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1674            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1675              break;
1676          }          }
1677        }        }
1678    
1679      if (op->one_char == 0)      if (op->one_char == 0)
1680        {        {
1681        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 550  for (i = 1; i < argc; i++) Line 1683  for (i = 1; i < argc; i++)
1683        }        }
1684      }      }
1685    
1686    /* One-char options */  
1687      /* Jeffrey Friedl's debugging harness uses these additional options which
1688      are not in the right form for putting in the option table because they use
1689      only one hyphen, yet are more than one character long. By putting them
1690      separately here, they will not get displayed as part of the help() output,
1691      but I don't think Jeffrey will care about that. */
1692    
1693    #ifdef JFRIEDL_DEBUG
1694      else if (strcmp(argv[i], "-pre") == 0) {
1695              jfriedl_prefix = argv[++i];
1696              continue;
1697      } else if (strcmp(argv[i], "-post") == 0) {
1698              jfriedl_postfix = argv[++i];
1699              continue;
1700      } else if (strcmp(argv[i], "-XT") == 0) {
1701              sscanf(argv[++i], "%d", &jfriedl_XT);
1702              continue;
1703      } else if (strcmp(argv[i], "-XR") == 0) {
1704              sscanf(argv[++i], "%d", &jfriedl_XR);
1705              continue;
1706      }
1707    #endif
1708    
1709    
1710      /* One-char options; many that have no data may be in a single argument; we
1711      continue till we hit the last one or one that needs data. */
1712    
1713    else    else
1714      {      {
1715      char *s = argv[i] + 1;      char *s = argv[i] + 1;
1716        longop = FALSE;
1717      while (*s != 0)      while (*s != 0)
1718        {        {
1719        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
1720            { if (*s == op->one_char) break; }
1721          if (op->one_char == 0)
1722          {          {
1723          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1724          if (pattern_filename[0] == 0)            *s, argv[i]);
1725            {          exit(usage(2));
1726            if (i >= argc - 1)          }
1727              {        if (op->type != OP_NODATA || s[1] == 0)
1728              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
1729              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
1730          break;          break;
1731          }          }
1732        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1733        }        }
1734      }      }
   }  
1735    
1736  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
1737  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
1738      something in the PCRE options. */
1739    
1740  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
1741    {      {
1742    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
1743    return 2;      continue;
1744    }      }
1745    
1746  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1747      either has a value or defaults to something. It cannot have data in a
1748      separate item. At the moment, the only such options are "colo(u)r" and
1749      Jeffrey Friedl's special -S debugging option. */
1750    
1751  if (pattern_filename != NULL)    if (*option_data == 0 &&
1752    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
1753      {      {
1754      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
1755        strerror(errno));        {
1756      return 2;        case N_COLOUR:
1757          colour_option = (char *)"auto";
1758          break;
1759    #ifdef JFRIEDL_DEBUG
1760          case 'S':
1761          S_arg = 0;
1762          break;
1763    #endif
1764          }
1765        continue;
1766      }      }
1767    while (fgets(buffer, sizeof(buffer), f) != NULL)  
1768      /* Otherwise, find the data string for the option. */
1769    
1770      if (*option_data == 0)
1771      {      {
1772      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
1773      if (pattern_count >= MAX_PATTERN_COUNT)        {
1774          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1775          exit(usage(2));
1776          }
1777        option_data = argv[++i];
1778        }
1779    
1780      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1781      multiple times to create a list of patterns. */
1782    
1783      if (op->type == OP_PATLIST)
1784        {
1785        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1786        {        {
1787        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1788          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
1789        return 2;        return 2;
1790        }        }
1791      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
1792      if (s == buffer) continue;      }
1793      *s = 0;  
1794      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
1795        &errptr, NULL);  
1796      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1797        {
1798        *((char **)op->dataptr) = option_data;
1799        }
1800      else
1801        {
1802        char *endptr;
1803        int n = strtoul(option_data, &endptr, 10);
1804        if (*endptr != 0)
1805        {        {
1806        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
1807          pattern_count, errptr, error);          {
1808        return 2;          char *equals = strchr(op->long_name, '=');
1809            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1810              equals - op->long_name;
1811            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1812              option_data, nlen, op->long_name);
1813            }
1814          else
1815            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1816              option_data, op->one_char);
1817          exit(usage(2));
1818        }        }
1819        *((int *)op->dataptr) = n;
1820        }
1821      }
1822    
1823    /* Options have been decoded. If -C was used, its value is used as a default
1824    for -A and -B. */
1825    
1826    if (both_context > 0)
1827      {
1828      if (after_context == 0) after_context = both_context;
1829      if (before_context == 0) before_context = both_context;
1830      }
1831    
1832    /* If a locale has not been provided as an option, see if the LC_CTYPE or
1833    LC_ALL environment variable is set, and if so, use it. */
1834    
1835    if (locale == NULL)
1836      {
1837      locale = getenv("LC_ALL");
1838      locale_from = "LCC_ALL";
1839      }
1840    
1841    if (locale == NULL)
1842      {
1843      locale = getenv("LC_CTYPE");
1844      locale_from = "LC_CTYPE";
1845      }
1846    
1847    /* If a locale has been provided, set it, and generate the tables the PCRE
1848    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1849    
1850    if (locale != NULL)
1851      {
1852      if (setlocale(LC_CTYPE, locale) == NULL)
1853        {
1854        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1855          locale, locale_from);
1856        return 2;
1857        }
1858      pcretables = pcre_maketables();
1859      }
1860    
1861    /* Sort out colouring */
1862    
1863    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1864      {
1865      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1866      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1867      else
1868        {
1869        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1870          colour_option);
1871        return 2;
1872        }
1873      if (do_colour)
1874        {
1875        char *cs = getenv("PCREGREP_COLOUR");
1876        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1877        if (cs != NULL) colour_string = cs;
1878      }      }
   fclose(f);  
1879    }    }
1880    
1881  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
1882    
1883    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1884      {
1885      pcre_options |= PCRE_NEWLINE_CR;
1886      endlinetype = EL_CR;
1887      }
1888    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1889      {
1890      pcre_options |= PCRE_NEWLINE_LF;
1891      endlinetype = EL_LF;
1892      }
1893    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1894      {
1895      pcre_options |= PCRE_NEWLINE_CRLF;
1896      endlinetype = EL_CRLF;
1897      }
1898    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1899      {
1900      pcre_options |= PCRE_NEWLINE_ANY;
1901      endlinetype = EL_ANY;
1902      }
1903    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1904      {
1905      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1906      endlinetype = EL_ANYCRLF;
1907      }
1908  else  else
1909    {    {
1910    if (i >= argc) return usage(2);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1911    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
1912    if (pattern_list[0] == NULL)    }
1913    
1914    /* Interpret the text values for -d and -D */
1915    
1916    if (dee_option != NULL)
1917      {
1918      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1919      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1920      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1921      else
1922      {      {
1923      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
       error);  
1924      return 2;      return 2;
1925      }      }
   pattern_count++;  
1926    }    }
1927    
1928  /* Study the regular expressions, as we will be running them may times */  if (DEE_option != NULL)
1929      {
1930      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1931      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1932      else
1933        {
1934        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1935        return 2;
1936        }
1937      }
1938    
1939    /* Check the values for Jeffrey Friedl's debugging options. */
1940    
1941    #ifdef JFRIEDL_DEBUG
1942    if (S_arg > 9)
1943      {
1944      fprintf(stderr, "pcregrep: bad value for -S option\n");
1945      return 2;
1946      }
1947    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1948      {
1949      if (jfriedl_XT == 0) jfriedl_XT = 1;
1950      if (jfriedl_XR == 0) jfriedl_XR = 1;
1951      }
1952    #endif
1953    
1954    /* Get memory to store the pattern and hints lists. */
1955    
1956    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1957    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1958    
1959    if (pattern_list == NULL || hints_list == NULL)
1960      {
1961      fprintf(stderr, "pcregrep: malloc failed\n");
1962      goto EXIT2;
1963      }
1964    
1965    /* If no patterns were provided by -e, and there is no file provided by -f,
1966    the first argument is the one and only pattern, and it must exist. */
1967    
1968    if (cmd_pattern_count == 0 && pattern_filename == NULL)
1969      {
1970      if (i >= argc) return usage(2);
1971      patterns[cmd_pattern_count++] = argv[i++];
1972      }
1973    
1974    /* Compile the patterns that were provided on the command line, either by
1975    multiple uses of -e or as a single unkeyed pattern. */
1976    
1977    for (j = 0; j < cmd_pattern_count; j++)
1978      {
1979      if (!compile_pattern(patterns[j], pcre_options, NULL,
1980           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1981        goto EXIT2;
1982      }
1983    
1984    /* Compile the regular expressions that are provided in a file. */
1985    
1986    if (pattern_filename != NULL)
1987      {
1988      int linenumber = 0;
1989      FILE *f;
1990      char *filename;
1991      char buffer[MBUFTHIRD];
1992    
1993      if (strcmp(pattern_filename, "-") == 0)
1994        {
1995        f = stdin;
1996        filename = stdin_name;
1997        }
1998      else
1999        {
2000        f = fopen(pattern_filename, "r");
2001        if (f == NULL)
2002          {
2003          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2004            strerror(errno));
2005          goto EXIT2;
2006          }
2007        filename = pattern_filename;
2008        }
2009    
2010      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2011        {
2012        char *s = buffer + (int)strlen(buffer);
2013        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2014        *s = 0;
2015        linenumber++;
2016        if (buffer[0] == 0) continue;   /* Skip blank lines */
2017        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2018          goto EXIT2;
2019        }
2020    
2021      if (f != stdin) fclose(f);
2022      }
2023    
2024    /* Study the regular expressions, as we will be running them many times */
2025    
2026  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2027    {    {
# Line 646  for (j = 0; j < pattern_count; j++) Line 2031  for (j = 0; j < pattern_count; j++)
2031      char s[16];      char s[16];
2032      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2033      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2034      return 2;      goto EXIT2;
2035        }
2036      hint_count++;
2037      }
2038    
2039    /* If there are include or exclude patterns, compile them. */
2040    
2041    if (exclude_pattern != NULL)
2042      {
2043      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2044        pcretables);
2045      if (exclude_compiled == NULL)
2046        {
2047        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2048          errptr, error);
2049        goto EXIT2;
2050        }
2051      }
2052    
2053    if (include_pattern != NULL)
2054      {
2055      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2056        pcretables);
2057      if (include_compiled == NULL)
2058        {
2059        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2060          errptr, error);
2061        goto EXIT2;
2062      }      }
2063    }    }
2064    
2065  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2066    
2067  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2068      {
2069      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2070      goto EXIT;
2071      }
2072    
2073  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2074  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2075  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2076    otherwise forced. */
2077    
2078  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2079    
2080  for (; i < argc; i++)  for (; i < argc; i++)
2081    {    {
2082    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2083    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2084      if (frc > 1) rc = frc;
2085        else if (frc == 0 && rc == 1) rc = 0;
2086    }    }
2087    
2088    EXIT:
2089    if (pattern_list != NULL)
2090      {
2091      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2092      free(pattern_list);
2093      }
2094    if (hints_list != NULL)
2095      {
2096      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2097      free(hints_list);
2098      }
2099  return rc;  return rc;
2100    
2101    EXIT2:
2102    rc = 2;
2103    goto EXIT;
2104  }  }
2105    
2106  /* End */  /* End of pcregrep */

Legend:
Removed from v.75  
changed lines
  Added in v.243

  ViewVC Help
Powered by ViewVC 1.1.5