/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2006 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40  #include <ctype.h>  #include <ctype.h>
41    #include <locale.h>
42  #include <stdio.h>  #include <stdio.h>
43  #include <string.h>  #include <string.h>
44  #include <stdlib.h>  #include <stdlib.h>
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 56  POSSIBILITY OF SUCH DAMAGE.
56    
57  typedef int BOOL;  typedef int BOOL;
58    
59  #define VERSION "4.1 05-Sep-2005"  #define VERSION "4.3 01-Jun-2006"
60  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
61    
62  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 65  typedef int BOOL; Line 66  typedef int BOOL;
66  #endif  #endif
67    
68    
69    /* Values for the "filenames" variable, which specifies options for file name
70    output. The order is important; it is assumed that a file name is wanted for
71    all values greater than FN_DEFAULT. */
72    
73    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
74    
75    /* Actions for the -d and -D options */
76    
77    enum { dee_READ, dee_SKIP, dee_RECURSE };
78    enum { DEE_READ, DEE_SKIP };
79    
80    /* Actions for special processing options (flag bits) */
81    
82    #define PO_WORD_MATCH     0x0001
83    #define PO_LINE_MATCH     0x0002
84    #define PO_FIXED_STRINGS  0x0004
85    
86    
87    
88  /*************************************************  /*************************************************
89  *               Global variables                 *  *               Global variables                 *
90  *************************************************/  *************************************************/
91    
92    /* Jeffrey Friedl has some debugging requirements that are not part of the
93    regular code. */
94    
95    #ifdef JFRIEDL_DEBUG
96    static int S_arg = -1;
97    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
98    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
99    static const char *jfriedl_prefix = "";
100    static const char *jfriedl_postfix = "";
101    #endif
102    
103    static int  endlinebyte = '\n';     /* Last byte of endline sequence */
104    static int  endlineextra = 0;       /* Extra bytes for endline sequence */
105    
106    static char *colour_string = (char *)"1;31";
107    static char *colour_option = NULL;
108    static char *dee_option = NULL;
109    static char *DEE_option = NULL;
110    static char *newline = NULL;
111  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
112  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
113    static char *locale = NULL;
114    
115    static const unsigned char *pcretables = NULL;
116    
117  static int  pattern_count = 0;  static int  pattern_count = 0;
118  static pcre **pattern_list;  static pcre **pattern_list;
119  static pcre_extra **hints_list;  static pcre_extra **hints_list;
# Line 85  static pcre *exclude_compiled = NULL; Line 127  static pcre *exclude_compiled = NULL;
127  static int after_context = 0;  static int after_context = 0;
128  static int before_context = 0;  static int before_context = 0;
129  static int both_context = 0;  static int both_context = 0;
130    static int dee_action = dee_READ;
131    static int DEE_action = DEE_READ;
132    static int error_count = 0;
133    static int filenames = FN_DEFAULT;
134    static int process_options = 0;
135    
136  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
137  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
 static BOOL filenames_only = FALSE;  
 static BOOL filenames_nomatch_only = FALSE;  
138  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
139  static BOOL invert = FALSE;  static BOOL invert = FALSE;
140  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
141  static BOOL number = FALSE;  static BOOL number = FALSE;
142    static BOOL only_matching = FALSE;
143  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
 static BOOL recurse = FALSE;  
144  static BOOL silent = FALSE;  static BOOL silent = FALSE;
 static BOOL whole_lines = FALSE;  
 static BOOL word_match = FALSE;  
145    
146  /* Structure for options and list of them */  /* Structure for options and list of them */
147    
148  enum { OP_NODATA, OP_STRING, OP_NUMBER };  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
149           OP_PATLIST };
150    
151  typedef struct option_item {  typedef struct option_item {
152    int type;    int type;
# Line 112  typedef struct option_item { Line 156  typedef struct option_item {
156    const char *help_text;    const char *help_text;
157  } option_item;  } option_item;
158    
159    /* Options without a single-letter equivalent get a negative value. This can be
160    used to identify them. */
161    
162    #define N_COLOUR    (-1)
163    #define N_EXCLUDE   (-2)
164    #define N_HELP      (-3)
165    #define N_INCLUDE   (-4)
166    #define N_LABEL     (-5)
167    #define N_LOCALE    (-6)
168    #define N_NULL      (-7)
169    
170  static option_item optionlist[] = {  static option_item optionlist[] = {
171    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
172    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
173    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
174    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
175    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
176    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
177    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
178    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
179    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
180    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
181    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
182    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
183    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
184    { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
185    { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
186    { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
187    { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
188    { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
189    { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
190    { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
191    { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
192    { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },
193    { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
194    { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
195    { OP_NODATA, 0,   NULL,               NULL,            NULL }    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
196      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
197      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
198      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
199    #ifdef JFRIEDL_DEBUG
200      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
201    #endif
202      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
203      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
204      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
205      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
206      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
207      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
208      { OP_NODATA,    0,        NULL,               NULL,            NULL }
209  };  };
210    
211    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
212    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
213    that the combination of -w and -x has the same effect as -x on its own, so we
214    can treat them as the same. */
215    
216    static const char *prefix[] = {
217      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
218    
219    static const char *suffix[] = {
220      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
221    
222    
223    
224  /*************************************************  /*************************************************
225  *       Functions for directory scanning         *  *            OS-specific functions               *
226  *************************************************/  *************************************************/
227    
228  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
229  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
230    
231    
232  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
# Line 194  closedir(dir); Line 273  closedir(dir);
273  }  }
274    
275    
276    /************* Test for regular file in Unix **********/
277    
278    static int
279    isregfile(char *filename)
280    {
281    struct stat statbuf;
282    if (stat(filename, &statbuf) < 0)
283      return 1;        /* In the expectation that opening as a file will fail */
284    return (statbuf.st_mode & S_IFMT) == S_IFREG;
285    }
286    
287    
288    /************* Test stdout for being a terminal in Unix **********/
289    
290    static BOOL
291    is_stdout_tty(void)
292    {
293    return isatty(fileno(stdout));
294    }
295    
296    
297  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
298    
299  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
# Line 292  free(dir); Line 392  free(dir);
392  }  }
393    
394    
395    /************* Test for regular file in Win32 **********/
396    
397    /* I don't know how to do this, or if it can be done; assume all paths are
398    regular if they are not directories. */
399    
400    int isregfile(char *filename)
401    {
402    return !isdirectory(filename)
403    }
404    
405    
406    /************* Test stdout for being a terminal in Win32 **********/
407    
408    /* I don't know how to do this; assume never */
409    
410    static BOOL
411    is_stdout_tty(void)
412    {
413    FALSE;
414    }
415    
416    
417  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
418    
419  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 300  free(dir); Line 422  free(dir);
422    
423  typedef void directory_type;  typedef void directory_type;
424    
425  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
426  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) {}
427  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) {}
428  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
429    
430    
431    /************* Test for regular when we can't do it **********/
432    
433    /* Assume all files are regular. */
434    
435    int isregfile(char *filename) { return 1; }
436    
437    
438    /************* Test stdout for being a terminal when we can't do it **********/
439    
440    static BOOL
441    is_stdout_tty(void)
442    {
443    return FALSE;
444    }
445    
446    
447  #endif  #endif
448    
449    
# Line 336  return sys_errlist[n]; Line 475  return sys_errlist[n];
475  *************************************************/  *************************************************/
476    
477  /* This is called if we are about to lose said lines because of buffer filling,  /* This is called if we are about to lose said lines because of buffer filling,
478  and at the end of the file.  and at the end of the file. The data in the line is written using fwrite() so
479    that a binary zero does not terminate it.
480    
481  Arguments:  Arguments:
482    lastmatchnumber   the number of the last matching line, plus one    lastmatchnumber   the number of the last matching line, plus one
# Line 358  if (after_context > 0 && lastmatchnumber Line 498  if (after_context > 0 && lastmatchnumber
498      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
499      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
500      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
501      while (*pp != '\n') pp++;      while (*pp != endlinebyte) pp++;
502      fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + (1 + endlineextra),
503          stdout);
504      lastmatchrestart = pp + 1;      lastmatchrestart = pp + 1;
505      }      }
506    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 418  way, the buffer is shifted left and re-f Line 559  way, the buffer is shifted left and re-f
559  while (ptr < endptr)  while (ptr < endptr)
560    {    {
561    int i;    int i;
562      int mrc = 0;
563    BOOL match = FALSE;    BOOL match = FALSE;
564    char *t = ptr;    char *t = ptr;
565    size_t length, linelength;    size_t length, linelength;
# Line 430  while (ptr < endptr) Line 572  while (ptr < endptr)
572    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
573    
574    linelength = 0;    linelength = 0;
575    while (t < endptr && *t++ != '\n') linelength++;    while (t < endptr && *t++ != endlinebyte) linelength++;
576    length = multiline? endptr - ptr : linelength;    length = multiline? endptr - ptr : linelength;
577    
578    
579      /* Extra processing for Jeffrey Friedl's debugging. */
580    
581    #ifdef JFRIEDL_DEBUG
582      if (jfriedl_XT || jfriedl_XR)
583      {
584          #include <sys/time.h>
585          #include <time.h>
586          struct timeval start_time, end_time;
587          struct timezone dummy;
588    
589          if (jfriedl_XT)
590          {
591              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
592              const char *orig = ptr;
593              ptr = malloc(newlen + 1);
594              if (!ptr) {
595                      printf("out of memory");
596                      exit(2);
597              }
598              endptr = ptr;
599              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
600              for (i = 0; i < jfriedl_XT; i++) {
601                      strncpy(endptr, orig,  length);
602                      endptr += length;
603              }
604              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
605              length = newlen;
606          }
607    
608          if (gettimeofday(&start_time, &dummy) != 0)
609                  perror("bad gettimeofday");
610    
611    
612          for (i = 0; i < jfriedl_XR; i++)
613              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
614    
615          if (gettimeofday(&end_time, &dummy) != 0)
616                  perror("bad gettimeofday");
617    
618          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
619                          -
620                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
621    
622          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
623          return 0;
624      }
625    #endif
626    
627    
628    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
629    the final newline in the subject string. */    the final newline in the subject string. */
630    
631    for (i = 0; !match && i < pattern_count; i++)    for (i = 0; i < pattern_count; i++)
632      {      {
633      match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
634        offsets, 99) >= 0;        offsets, 99);
635        if (mrc >= 0) { match = TRUE; break; }
636        if (mrc != PCRE_ERROR_NOMATCH)
637          {
638          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
639          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
640          fprintf(stderr, "this line:\n");
641          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
642          fprintf(stderr, "\n");
643          if (error_count == 0 &&
644              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
645            {
646            fprintf(stderr, "pcregrep: error %d means that a resource limit "
647              "was exceeded\n", mrc);
648            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
649            }
650          if (error_count++ > 20)
651            {
652            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
653            exit(2);
654            }
655          match = invert;    /* No more matching; don't show the line again */
656          break;
657          }
658      }      }
659    
660    /* If it's a match or a not-match (as required), print what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
661    
662    if (match != invert)    if (match != invert)
663      {      {
664      BOOL hyphenprinted = FALSE;      BOOL hyphenprinted = FALSE;
665    
666      if (filenames_nomatch_only) return 1;      /* We've failed if we want a file that doesn't have any matches. */
667    
668        if (filenames == FN_NOMATCH_ONLY) return 1;
669    
670        /* Just count if just counting is wanted. */
671    
672      if (count_only) count++;      if (count_only) count++;
673    
674      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
675        in the file. */
676    
677        else if (filenames == FN_ONLY)
678        {        {
679        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
680        return 0;        return 0;
681        }        }
682    
683        /* Likewise, if all we want is a yes/no answer. */
684    
685      else if (quiet) return 0;      else if (quiet) return 0;
686    
687        /* The --only-matching option prints just the substring that matched, and
688        does not pring any context. */
689    
690        else if (only_matching)
691          {
692          if (printname != NULL) fprintf(stdout, "%s:", printname);
693          if (number) fprintf(stdout, "%d:", linenumber);
694          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
695          fprintf(stdout, "\n");
696          }
697    
698        /* This is the default case when none of the above options is set. We print
699        the matching lines(s), possibly preceded and/or followed by other lines of
700        context. */
701    
702      else      else
703        {        {
704        /* See if there is a requirement to print some "after" lines from a        /* See if there is a requirement to print some "after" lines from a
# Line 472  while (ptr < endptr) Line 711  while (ptr < endptr)
711    
712          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
713            {            {
714            while (*p != '\n') p++;            while (*p != endlinebyte) p++;
715            p++;            p++;
716            linecount++;            linecount++;
717            }            }
718    
719          /* It is important to advance lastmatchrestart during this printing so          /* It is important to advance lastmatchrestart during this printing so
720          that it interacts correctly with any "before" printing below. */          that it interacts correctly with any "before" printing below. Print
721            each line's data using fwrite() in case there are binary zeroes. */
722    
723          while (lastmatchrestart < p)          while (lastmatchrestart < p)
724            {            {
725            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
726            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
727            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
728            while (*pp != '\n') pp++;            while (*pp != endlinebyte) pp++;
729            fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart +
730                (1 + endlineextra), stdout);
731            lastmatchrestart = pp + 1;            lastmatchrestart = pp + 1;
732            }            }
733          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 510  while (ptr < endptr) Line 751  while (ptr < endptr)
751          char *p = ptr;          char *p = ptr;
752    
753          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
754                 linecount++ < before_context)                 linecount < before_context)
755            {            {
756              linecount++;
757            p--;            p--;
758            while (p > buffer && p[-1] != '\n') p--;            while (p > buffer && p[-1] != endlinebyte) p--;
759            }            }
760    
761          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 524  while (ptr < endptr) Line 766  while (ptr < endptr)
766            char *pp = p;            char *pp = p;
767            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
768            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
769            while (*pp != '\n') pp++;            while (*pp != endlinebyte) pp++;
770            fprintf(stdout, "%.*s", pp - p + 1, p);            fwrite(p, 1, pp - p + (1 + endlineextra), stdout);
771            p = pp + 1;            p = pp + 1;
772            }            }
773          }          }
# Line 542  while (ptr < endptr) Line 784  while (ptr < endptr)
784        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
785        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
786        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately. Because the PCRE_FIRSTLINE option is set, the
787        start of the match will always be before the first \n character. */        start of the match will always be before the first newline sequence. */
788    
789        if (multiline)        if (multiline)
790          {          {
791          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
792          t = ptr;          t = ptr;
793          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch) { if (*t++ == endlinebyte) linenumber++; }
794          while (endmatch < endptr && *endmatch != '\n') endmatch++;          while (endmatch < endptr && *endmatch != endlinebyte) endmatch++;
795          linelength = endmatch - ptr;          linelength = endmatch - ptr;
796          }          }
797    
798        fprintf(stdout, "%.*s\n", linelength, ptr);        /*** NOTE: Use only fwrite() to output the data line, so that binary
799          zeroes are treated as just another data character. */
800    
801          /* This extra option, for Jeffrey Friedl's debugging requirements,
802          replaces the matched string, or a specific captured string if it exists,
803          with X. When this happens, colouring is ignored. */
804    
805    #ifdef JFRIEDL_DEBUG
806          if (S_arg >= 0 && S_arg < mrc)
807            {
808            int first = S_arg * 2;
809            int last  = first + 1;
810            fwrite(ptr, 1, offsets[first], stdout);
811            fprintf(stdout, "X");
812            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
813            }
814          else
815    #endif
816    
817          /* We have to split the line(s) up if colouring. */
818    
819          if (do_colour)
820            {
821            fwrite(ptr, 1, offsets[0], stdout);
822            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
823            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
824            fprintf(stdout, "%c[00m", 0x1b);
825            fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
826            }
827          else fwrite(ptr, 1, linelength, stdout);
828    
829          fprintf(stdout, "\n");
830        }        }
831    
832        /* End of doing what has to be done for a match */
833    
834      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
835    
836      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 601  while (ptr < endptr) Line 876  while (ptr < endptr)
876  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
877  hyphenpending if it prints something. */  hyphenpending if it prints something. */
878    
879  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);  if (!only_matching && !count_only)
880  hyphenpending |= endhyphenpending;    {
881      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
882      hyphenpending |= endhyphenpending;
883      }
884    
885  /* Print the file name if we are looking for those without matches and there  /* Print the file name if we are looking for those without matches and there
886  were none. If we found a match, we won't have got this far. */  were none. If we found a match, we won't have got this far. */
887    
888  if (filenames_nomatch_only)  if (filenames == FN_NOMATCH_ONLY)
889    {    {
890    fprintf(stdout, "%s\n", printname);    fprintf(stdout, "%s\n", printname);
891    return 0;    return 0;
# Line 635  recursing; if it's a file, grep it. Line 913  recursing; if it's a file, grep it.
913    
914  Arguments:  Arguments:
915    pathname          the path to investigate    pathname          the path to investigate
916    dir_recurse       TRUE if recursing is wanted (-r)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   show_filenames    TRUE if file names are wanted for multiple files, except  
                       for the only file at top level when not filenames_only  
917    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
918    
919  Returns:   0 if there was at least one match  Returns:   0 if there was at least one match
# Line 648  However, file opening failures are suppr Line 924  However, file opening failures are suppr
924  */  */
925    
926  static int  static int
927  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
928  {  {
929  int rc = 1;  int rc = 1;
930  int sep;  int sep;
931  FILE *in;  FILE *in;
 char *printname;  
932    
933  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
934    
935  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
936    {    {
937    return pcregrep(stdin,    return pcregrep(stdin,
938      (filenames_only || filenames_nomatch_only ||      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
     (show_filenames && !only_one_at_top))?  
939        stdin_name : NULL);        stdin_name : NULL);
940    }    }
941    
 /* If the file is a directory and we are recursing, scan each file within it,  
 subject to any include or exclude patterns that were set. The scanning code is  
 localized so it can be made system-specific. */  
942    
943  if ((sep = isdirectory(pathname)) != 0 && dir_recurse)  /* If the file is a directory, skip if skipping or if we are recursing, scan
944    {  each file within it, subject to any include or exclude patterns that were set.
945    char buffer[1024];  The scanning code is localized so it can be made system-specific. */
   char *nextfile;  
   directory_type *dir = opendirectory(pathname);  
946    
947    if (dir == NULL)  if ((sep = isdirectory(pathname)) != 0)
948      {    {
949      if (!silent)    if (dee_action == dee_SKIP) return 1;
950        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,    if (dee_action == dee_RECURSE)
         strerror(errno));  
     return 2;  
     }  
   
   while ((nextfile = readdirectory(dir)) != NULL)  
951      {      {
952      int frc, blen;      char buffer[1024];
953      sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);      char *nextfile;
954      blen = strlen(buffer);      directory_type *dir = opendirectory(pathname);
955    
956      if (exclude_compiled != NULL &&      if (dir == NULL)
957          pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)        {
958        continue;        if (!silent)
959            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
960              strerror(errno));
961          return 2;
962          }
963    
964      if (include_compiled != NULL &&      while ((nextfile = readdirectory(dir)) != NULL)
965          pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)        {
966        continue;        int frc, blen;
967          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
968          blen = strlen(buffer);
969    
970          if (exclude_compiled != NULL &&
971              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
972            continue;
973    
974          if (include_compiled != NULL &&
975              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
976            continue;
977    
978          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
979          if (frc > 1) rc = frc;
980           else if (frc == 0 && rc == 1) rc = 0;
981          }
982    
983      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);      closedirectory(dir);
984      if (frc > 1) rc = frc;      return rc;
      else if (frc == 0 && rc == 1) rc = 0;  
985      }      }
   
   closedirectory(dir);  
   return rc;  
986    }    }
987    
988  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* If the file is not a directory and not a regular file, skip it if that's
989  the first and only argument at top level, we don't show the file name (unless  been requested. */
990  we are only showing the file name). Otherwise, control is via the  
991  show_filenames variable. */  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
992    
993    /* Control reaches here if we have a regular file, or if we have a directory
994    and recursion or skipping was not requested, or if we have anything else and
995    skipping was not requested. The scan proceeds. If this is the first and only
996    argument at top level, we don't show the file name, unless we are only showing
997    the file name, or the filename was forced (-H). */
998    
999  in = fopen(pathname, "r");  in = fopen(pathname, "r");
1000  if (in == NULL)  if (in == NULL)
# Line 721  if (in == NULL) Line 1005  if (in == NULL)
1005    return 2;    return 2;
1006    }    }
1007    
1008  printname =  (filenames_only || filenames_nomatch_only ||  rc = pcregrep(in, (filenames > FN_DEFAULT ||
1009    (show_filenames && !only_one_at_top))? pathname : NULL;    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
   
 rc = pcregrep(in, printname);  
1010    
1011  fclose(in);  fclose(in);
1012  return rc;  return rc;
# Line 740  return rc; Line 1022  return rc;
1022  static int  static int
1023  usage(int rc)  usage(int rc)
1024  {  {
1025  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");  option_item *op;
1026    fprintf(stderr, "Usage: pcregrep [-");
1027    for (op = optionlist; op->one_char != 0; op++)
1028      {
1029      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1030      }
1031    fprintf(stderr, "] [long options] [pattern] [files]\n");
1032  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1033  return rc;  return rc;
1034  }  }
# Line 759  option_item *op; Line 1047  option_item *op;
1047    
1048  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1049  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1050  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1051  printf("\"-\" can be used as a file name to mean STDIN.\n");  printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1052  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1053    
1054  printf("Options:\n");  printf("Options:\n");
# Line 796  handle_option(int letter, int options) Line 1084  handle_option(int letter, int options)
1084  {  {
1085  switch(letter)  switch(letter)
1086    {    {
1087    case -1:  help(); exit(0);    case N_HELP: help(); exit(0);
1088    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1089    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1090      case 'H': filenames = FN_FORCE; break;
1091      case 'h': filenames = FN_NONE; break;
1092    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1093    case 'l': filenames_only = TRUE; break;    case 'l': filenames = FN_ONLY; break;
1094    case 'L': filenames_nomatch_only = TRUE; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1095    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1096    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1097      case 'o': only_matching = TRUE; break;
1098    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1099    case 'r': recurse = TRUE; break;    case 'r': dee_action = dee_RECURSE; break;
1100    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1101    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; break;
1102    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1103    case 'w': word_match = TRUE; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1104    case 'x': whole_lines = TRUE; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1105    
1106    case 'V':    case 'V':
1107    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s using ", VERSION);
# Line 830  return options; Line 1121  return options;
1121    
1122    
1123  /*************************************************  /*************************************************
1124    *          Construct printed ordinal             *
1125    *************************************************/
1126    
1127    /* This turns a number into "1st", "3rd", etc. */
1128    
1129    static char *
1130    ordin(int n)
1131    {
1132    static char buffer[8];
1133    char *p = buffer;
1134    sprintf(p, "%d", n);
1135    while (*p != 0) p++;
1136    switch (n%10)
1137      {
1138      case 1: strcpy(p, "st"); break;
1139      case 2: strcpy(p, "nd"); break;
1140      case 3: strcpy(p, "rd"); break;
1141      default: strcpy(p, "th"); break;
1142      }
1143    return buffer;
1144    }
1145    
1146    
1147    
1148    /*************************************************
1149    *          Compile a single pattern              *
1150    *************************************************/
1151    
1152    /* When the -F option has been used, this is called for each substring.
1153    Otherwise it's called for each supplied pattern.
1154    
1155    Arguments:
1156      pattern        the pattern string
1157      options        the PCRE options
1158      filename       the file name, or NULL for a command-line pattern
1159      count          0 if this is the only command line pattern, or
1160                     number of the command line pattern, or
1161                     linenumber for a pattern from a file
1162    
1163    Returns:         TRUE on success, FALSE after an error
1164    */
1165    
1166    static BOOL
1167    compile_single_pattern(char *pattern, int options, char *filename, int count)
1168    {
1169    char buffer[MBUFTHIRD + 16];
1170    const char *error;
1171    int errptr;
1172    
1173    if (pattern_count >= MAX_PATTERN_COUNT)
1174      {
1175      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1176        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1177      return FALSE;
1178      }
1179    
1180    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1181      suffix[process_options]);
1182    pattern_list[pattern_count] =
1183      pcre_compile(buffer, options, &error, &errptr, pcretables);
1184    if (pattern_list[pattern_count++] != NULL) return TRUE;
1185    
1186    /* Handle compile errors */
1187    
1188    errptr -= (int)strlen(prefix[process_options]);
1189    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1190    
1191    if (filename == NULL)
1192      {
1193      if (count == 0)
1194        fprintf(stderr, "pcregrep: Error in command-line regex "
1195          "at offset %d: %s\n", errptr, error);
1196      else
1197        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1198          "at offset %d: %s\n", ordin(count), errptr, error);
1199      }
1200    else
1201      {
1202      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1203        "at offset %d: %s\n", count, filename, errptr, error);
1204      }
1205    
1206    return FALSE;
1207    }
1208    
1209    
1210    
1211    /*************************************************
1212    *           Compile one supplied pattern         *
1213    *************************************************/
1214    
1215    /* When the -F option has been used, each string may be a list of strings,
1216    separated by line breaks. They will be matched literally.
1217    
1218    Arguments:
1219      pattern        the pattern string
1220      options        the PCRE options
1221      filename       the file name, or NULL for a command-line pattern
1222      count          0 if this is the only command line pattern, or
1223                     number of the command line pattern, or
1224                     linenumber for a pattern from a file
1225    
1226    Returns:         TRUE on success, FALSE after an error
1227    */
1228    
1229    static BOOL
1230    compile_pattern(char *pattern, int options, char *filename, int count)
1231    {
1232    if ((process_options & PO_FIXED_STRINGS) != 0)
1233      {
1234      char buffer[MBUFTHIRD];
1235      for(;;)
1236        {
1237        char *p = strchr(pattern, endlinebyte);
1238        if (p == NULL)
1239          return compile_single_pattern(pattern, options, filename, count);
1240        sprintf(buffer, "%.*s", p - pattern - endlineextra, pattern);
1241        pattern = p + 1;
1242        if (!compile_single_pattern(buffer, options, filename, count))
1243          return FALSE;
1244        }
1245      }
1246    else return compile_single_pattern(pattern, options, filename, count);
1247    }
1248    
1249    
1250    
1251    /*************************************************
1252  *                Main program                    *  *                Main program                    *
1253  *************************************************/  *************************************************/
1254    
# Line 840  main(int argc, char **argv) Line 1259  main(int argc, char **argv)
1259  {  {
1260  int i, j;  int i, j;
1261  int rc = 1;  int rc = 1;
1262  int options = 0;  int pcre_options = 0;
1263    int cmd_pattern_count = 0;
1264  int errptr;  int errptr;
 const char *error;  
1265  BOOL only_one_at_top;  BOOL only_one_at_top;
1266    char *patterns[MAX_PATTERN_COUNT];
1267    const char *locale_from = "--locale";
1268    const char *error;
1269    
1270    /* Set the default line ending value from the default in the PCRE library. */
1271    
1272    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1273    switch(i)
1274      {
1275      default:                 newline = (char *)"lf"; break;
1276      case '\r':               newline = (char *)"cr"; break;
1277      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1278      }
1279    
1280  /* Process the options */  /* Process the options */
1281    
# Line 857  for (i = 1; i < argc; i++) Line 1289  for (i = 1; i < argc; i++)
1289    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1290    
1291    /* If we hit an argument that is just "-", it may be a reference to STDIN,    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1292    but only if we have previously had -f to define the patterns. */    but only if we have previously had -e or -f to define the patterns. */
1293    
1294    if (argv[i][1] == 0)    if (argv[i][1] == 0)
1295      {      {
1296      if (pattern_filename != NULL) break;      if (pattern_filename != NULL || pattern_count > 0) break;
1297        else exit(usage(2));        else exit(usage(2));
1298      }      }
1299    
# Line 883  for (i = 1; i < argc; i++) Line 1315  for (i = 1; i < argc; i++)
1315      /* Some long options have data that follows after =, for example file=name.      /* Some long options have data that follows after =, for example file=name.
1316      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
1317      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
1318      with Jeff Friedl in preferring "regex" without the "p". These options are      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1319      entered in the table as "regex(p)". No option is in both these categories,      These options are entered in the table as "regex(p)". No option is in both
1320      fortunately. */      these categories, fortunately. */
1321    
1322      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1323        {        {
# Line 933  for (i = 1; i < argc; i++) Line 1365  for (i = 1; i < argc; i++)
1365        }        }
1366      }      }
1367    
1368    
1369      /* Jeffrey Friedl's debugging harness uses these additional options which
1370      are not in the right form for putting in the option table because they use
1371      only one hyphen, yet are more than one character long. By putting them
1372      separately here, they will not get displayed as part of the help() output,
1373      but I don't think Jeffrey will care about that. */
1374    
1375    #ifdef JFRIEDL_DEBUG
1376      else if (strcmp(argv[i], "-pre") == 0) {
1377              jfriedl_prefix = argv[++i];
1378              continue;
1379      } else if (strcmp(argv[i], "-post") == 0) {
1380              jfriedl_postfix = argv[++i];
1381              continue;
1382      } else if (strcmp(argv[i], "-XT") == 0) {
1383              sscanf(argv[++i], "%d", &jfriedl_XT);
1384              continue;
1385      } else if (strcmp(argv[i], "-XR") == 0) {
1386              sscanf(argv[++i], "%d", &jfriedl_XR);
1387              continue;
1388      }
1389    #endif
1390    
1391    
1392    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
1393    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
1394    
# Line 955  for (i = 1; i < argc; i++) Line 1411  for (i = 1; i < argc; i++)
1411          option_data = s+1;          option_data = s+1;
1412          break;          break;
1413          }          }
1414        options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1415        }        }
1416      }      }
1417    
1418    /* At this point we should have op pointing to a matched option */    /* At this point we should have op pointing to a matched option. If the type
1419      is NO_DATA, it means that there is no data, and the option might set
1420      something in the PCRE options. */
1421    
1422    if (op->type == OP_NODATA)    if (op->type == OP_NODATA)
     options = handle_option(op->one_char, options);  
   else  
1423      {      {
1424      if (*option_data == 0)      pcre_options = handle_option(op->one_char, pcre_options);
1425        continue;
1426        }
1427    
1428      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1429      either has a value or defaults to something. It cannot have data in a
1430      separate item. At the moment, the only such options are "colo(u)r" and
1431      Jeffrey Friedl's special -S debugging option. */
1432    
1433      if (*option_data == 0 &&
1434          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1435        {
1436        switch (op->one_char)
1437        {        {
1438        if (i >= argc - 1 || longopwasequals)        case N_COLOUR:
1439          {        colour_option = (char *)"auto";
1440          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        break;
1441          exit(usage(2));  #ifdef JFRIEDL_DEBUG
1442          }        case 'S':
1443        option_data = argv[++i];        S_arg = 0;
1444          break;
1445    #endif
1446          }
1447        continue;
1448        }
1449    
1450      /* Otherwise, find the data string for the option. */
1451    
1452      if (*option_data == 0)
1453        {
1454        if (i >= argc - 1 || longopwasequals)
1455          {
1456          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1457          exit(usage(2));
1458        }        }
1459        option_data = argv[++i];
1460        }
1461    
1462      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1463      multiple times to create a list of patterns. */
1464    
1465      if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else    if (op->type == OP_PATLIST)
1466        {
1467        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1468        {        {
1469        char *endptr;        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1470        int n = strtoul(option_data, &endptr, 10);          MAX_PATTERN_COUNT);
1471        if (*endptr != 0)        return 2;
1472          }
1473        patterns[cmd_pattern_count++] = option_data;
1474        }
1475    
1476      /* Otherwise, deal with single string or numeric data values. */
1477    
1478      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1479        {
1480        *((char **)op->dataptr) = option_data;
1481        }
1482      else
1483        {
1484        char *endptr;
1485        int n = strtoul(option_data, &endptr, 10);
1486        if (*endptr != 0)
1487          {
1488          if (longop)
1489          {          {
1490          if (longop)          char *equals = strchr(op->long_name, '=');
1491            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1492              option_data, op->long_name);            equals - op->long_name;
1493          else          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1494            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",            option_data, nlen, op->long_name);
             option_data, op->one_char);  
         exit(usage(2));  
1495          }          }
1496        *((int *)op->dataptr) = n;        else
1497            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1498              option_data, op->one_char);
1499          exit(usage(2));
1500        }        }
1501        *((int *)op->dataptr) = n;
1502      }      }
1503    }    }
1504    
# Line 1003  if (both_context > 0) Line 1511  if (both_context > 0)
1511    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
1512    }    }
1513    
1514  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* If a locale has not been provided as an option, see if the LC_CTYPE or
1515  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  LC_ALL environment variable is set, and if so, use it. */
1516    
1517  if (pattern_list == NULL || hints_list == NULL)  if (locale == NULL)
1518    {    {
1519    fprintf(stderr, "pcregrep: malloc failed\n");    locale = getenv("LC_ALL");
1520    return 2;    locale_from = "LCC_ALL";
1521    }    }
1522    
1523  /* Compile the regular expression(s). */  if (locale == NULL)
   
 if (pattern_filename != NULL)  
1524    {    {
1525    FILE *f = fopen(pattern_filename, "r");    locale = getenv("LC_CTYPE");
1526    char buffer[MBUFTHIRD + 16];    locale_from = "LC_CTYPE";
1527    char *rdstart;    }
   int adjust = 0;  
1528    
1529    if (f == NULL)  /* If a locale has been provided, set it, and generate the tables the PCRE
1530    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1531    
1532    if (locale != NULL)
1533      {
1534      if (setlocale(LC_CTYPE, locale) == NULL)
1535      {      {
1536      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1537        strerror(errno));        locale, locale_from);
1538      return 2;      return 2;
1539      }      }
1540      pcretables = pcre_maketables();
1541      }
1542    
1543    /* Sort out colouring */
1544    
1545    if (whole_lines)  if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1546      {
1547      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1548      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1549      else
1550      {      {
1551      strcpy(buffer, "^(?:");      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1552      adjust = 4;        colour_option);
1553        return 2;
1554      }      }
1555    else if (word_match)    if (do_colour)
1556      {      {
1557      strcpy(buffer, "\\b");      char *cs = getenv("PCREGREP_COLOUR");
1558      adjust = 2;      if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1559        if (cs != NULL) colour_string = cs;
1560      }      }
1561      }
1562    
1563    /* Interpret the newline type; the default settings are Unix-like. */
1564    
1565    rdstart = buffer + adjust;  if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1566    while (fgets(rdstart, MBUFTHIRD, f) != NULL)    {
1567      pcre_options |= PCRE_NEWLINE_CR;
1568      endlinebyte = '\r';
1569      }
1570    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1571      {
1572      pcre_options |= PCRE_NEWLINE_LF;
1573      }
1574    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1575      {
1576      pcre_options |= PCRE_NEWLINE_CRLF;
1577      endlineextra = 1;
1578      }
1579    else
1580      {
1581      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1582      return 2;
1583      }
1584    
1585    /* Interpret the text values for -d and -D */
1586    
1587    if (dee_option != NULL)
1588      {
1589      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1590      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1591      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1592      else
1593      {      {
1594      char *s = rdstart + (int)strlen(rdstart);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1595      if (pattern_count >= MAX_PATTERN_COUNT)      return 2;
       {  
       fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",  
         MAX_PATTERN_COUNT);  
       return 2;  
       }  
     while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;  
     if (s == rdstart) continue;  
     if (whole_lines) strcpy(s, ")$");  
       else if (word_match)strcpy(s, "\\b");  
         else *s = 0;  
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr - adjust, error);  
       return 2;  
       }  
1596      }      }
   fclose(f);  
1597    }    }
1598    
1599  /* If no file name, a single regex must be given inline. */  if (DEE_option != NULL)
1600      {
1601      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1602      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1603      else
1604        {
1605        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1606        return 2;
1607        }
1608      }
1609    
1610  else  /* Check the values for Jeffrey Friedl's debugging options. */
1611    
1612    #ifdef JFRIEDL_DEBUG
1613    if (S_arg > 9)
1614    {    {
1615    char buffer[MBUFTHIRD + 16];    fprintf(stderr, "pcregrep: bad value for -S option\n");
1616    char *pat;    return 2;
1617    int adjust = 0;    }
1618    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1619      {
1620      if (jfriedl_XT == 0) jfriedl_XT = 1;
1621      if (jfriedl_XR == 0) jfriedl_XR = 1;
1622      }
1623    #endif
1624    
1625    /* Get memory to store the pattern and hints lists. */
1626    
1627    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1628    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1629    
1630    if (pattern_list == NULL || hints_list == NULL)
1631      {
1632      fprintf(stderr, "pcregrep: malloc failed\n");
1633      return 2;
1634      }
1635    
1636    /* If no patterns were provided by -e, and there is no file provided by -f,
1637    the first argument is the one and only pattern, and it must exist. */
1638    
1639    if (cmd_pattern_count == 0 && pattern_filename == NULL)
1640      {
1641    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
1642      patterns[cmd_pattern_count++] = argv[i++];
1643      }
1644    
1645    if (whole_lines)  /* Compile the patterns that were provided on the command line, either by
1646    multiple uses of -e or as a single unkeyed pattern. */
1647    
1648    for (j = 0; j < cmd_pattern_count; j++)
1649      {
1650      if (!compile_pattern(patterns[j], pcre_options, NULL,
1651           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1652        return 2;
1653      }
1654    
1655    /* Compile the regular expressions that are provided in a file. */
1656    
1657    if (pattern_filename != NULL)
1658      {
1659      int linenumber = 0;
1660      FILE *f;
1661      char *filename;
1662      char buffer[MBUFTHIRD];
1663    
1664      if (strcmp(pattern_filename, "-") == 0)
1665      {      {
1666      sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);      f = stdin;
1667      pat = buffer;      filename = stdin_name;
     adjust = 4;  
1668      }      }
1669    else if (word_match)    else
1670      {      {
1671      sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);      f = fopen(pattern_filename, "r");
1672      pat = buffer;      if (f == NULL)
1673      adjust = 2;        {
1674          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1675            strerror(errno));
1676          return 2;
1677          }
1678        filename = pattern_filename;
1679      }      }
   else pat = argv[i++];  
   
   pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);  
1680    
1681    if (pattern_list[0] == NULL)    while (fgets(buffer, MBUFTHIRD, f) != NULL)
1682      {      {
1683      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",      char *s = buffer + (int)strlen(buffer);
1684        errptr - adjust, error);      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1685      return 2;      *s = 0;
1686        linenumber++;
1687        if (buffer[0] == 0) continue;   /* Skip blank lines */
1688        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1689          return 2;
1690      }      }
1691    pattern_count++;  
1692      if (f != stdin) fclose(f);
1693    }    }
1694    
1695  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times */
# Line 1119  for (j = 0; j < pattern_count; j++) Line 1710  for (j = 0; j < pattern_count; j++)
1710    
1711  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
1712    {    {
1713    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1714        pcretables);
1715    if (exclude_compiled == NULL)    if (exclude_compiled == NULL)
1716      {      {
1717      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
# Line 1130  if (exclude_pattern != NULL) Line 1722  if (exclude_pattern != NULL)
1722    
1723  if (include_pattern != NULL)  if (include_pattern != NULL)
1724    {    {
1725    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1726        pcretables);
1727    if (include_compiled == NULL)    if (include_compiled == NULL)
1728      {      {
1729      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
# Line 1139  if (include_pattern != NULL) Line 1732  if (include_pattern != NULL)
1732      }      }
1733    }    }
1734    
1735  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
1736    
1737  if (i >= argc) return pcregrep(stdin,  if (i >= argc)
1738    (filenames_only || filenames_nomatch_only)? stdin_name : NULL);    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1739    
1740  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
1741  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
1742  the file name if the argument is not a directory and filenames_only is not set.  the file name if the argument is not a directory and filenames are not
1743  */  otherwise forced. */
1744    
1745  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
1746    
1747  for (; i < argc; i++)  for (; i < argc; i++)
1748    {    {
1749    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1750        only_one_at_top);
1751    if (frc > 1) rc = frc;    if (frc > 1) rc = frc;
1752      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
1753    }    }

Legend:
Removed from v.85  
changed lines
  Added in v.91

  ViewVC Help
Powered by ViewVC 1.1.5