/[pcre]/code/tags/pcre-8.01/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-8.01/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2006 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40  #include <ctype.h>  #include <ctype.h>
41    #include <locale.h>
42  #include <stdio.h>  #include <stdio.h>
43  #include <string.h>  #include <string.h>
44  #include <stdlib.h>  #include <stdlib.h>
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 56  POSSIBILITY OF SUCH DAMAGE.
56    
57  typedef int BOOL;  typedef int BOOL;
58    
59  #define VERSION "4.0 07-Jun-2005"  #define VERSION "4.4 29-Nov-2006"
60  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
61    
62  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 64  typedef int BOOL; Line 65  typedef int BOOL;
65  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
66  #endif  #endif
67    
68    /* Values for the "filenames" variable, which specifies options for file name
69    output. The order is important; it is assumed that a file name is wanted for
70    all values greater than FN_DEFAULT. */
71    
72    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
73    
74    /* Actions for the -d and -D options */
75    
76    enum { dee_READ, dee_SKIP, dee_RECURSE };
77    enum { DEE_READ, DEE_SKIP };
78    
79    /* Actions for special processing options (flag bits) */
80    
81    #define PO_WORD_MATCH     0x0001
82    #define PO_LINE_MATCH     0x0002
83    #define PO_FIXED_STRINGS  0x0004
84    
85    /* Line ending types */
86    
87    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
88    
89    
90    
91  /*************************************************  /*************************************************
92  *               Global variables                 *  *               Global variables                 *
93  *************************************************/  *************************************************/
94    
95    /* Jeffrey Friedl has some debugging requirements that are not part of the
96    regular code. */
97    
98    #ifdef JFRIEDL_DEBUG
99    static int S_arg = -1;
100    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
101    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
102    static const char *jfriedl_prefix = "";
103    static const char *jfriedl_postfix = "";
104    #endif
105    
106    static int  endlinetype;
107    
108    static char *colour_string = (char *)"1;31";
109    static char *colour_option = NULL;
110    static char *dee_option = NULL;
111    static char *DEE_option = NULL;
112    static char *newline = NULL;
113  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
114  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
115    static char *locale = NULL;
116    
117    static const unsigned char *pcretables = NULL;
118    
119  static int  pattern_count = 0;  static int  pattern_count = 0;
120  static pcre **pattern_list;  static pcre **pattern_list;
121  static pcre_extra **hints_list;  static pcre_extra **hints_list;
# Line 85  static pcre *exclude_compiled = NULL; Line 129  static pcre *exclude_compiled = NULL;
129  static int after_context = 0;  static int after_context = 0;
130  static int before_context = 0;  static int before_context = 0;
131  static int both_context = 0;  static int both_context = 0;
132    static int dee_action = dee_READ;
133    static int DEE_action = DEE_READ;
134    static int error_count = 0;
135    static int filenames = FN_DEFAULT;
136    static int process_options = 0;
137    
138  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
139  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
 static BOOL filenames_only = FALSE;  
 static BOOL filenames_nomatch_only = FALSE;  
140  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
141  static BOOL invert = FALSE;  static BOOL invert = FALSE;
142  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
143  static BOOL number = FALSE;  static BOOL number = FALSE;
144    static BOOL only_matching = FALSE;
145  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
 static BOOL recurse = FALSE;  
146  static BOOL silent = FALSE;  static BOOL silent = FALSE;
147  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
 static BOOL word_match = FALSE;  
148    
149  /* Structure for options and list of them */  /* Structure for options and list of them */
150    
151  enum { OP_NODATA, OP_STRING, OP_NUMBER };  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
152           OP_PATLIST };
153    
154  typedef struct option_item {  typedef struct option_item {
155    int type;    int type;
# Line 112  typedef struct option_item { Line 159  typedef struct option_item {
159    const char *help_text;    const char *help_text;
160  } option_item;  } option_item;
161    
162    /* Options without a single-letter equivalent get a negative value. This can be
163    used to identify them. */
164    
165    #define N_COLOUR    (-1)
166    #define N_EXCLUDE   (-2)
167    #define N_HELP      (-3)
168    #define N_INCLUDE   (-4)
169    #define N_LABEL     (-5)
170    #define N_LOCALE    (-6)
171    #define N_NULL      (-7)
172    
173  static option_item optionlist[] = {  static option_item optionlist[] = {
174    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
175    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
176    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
177    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
178    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
179    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
180    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
181    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
182    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
183    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
184    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
185    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
186    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
187    { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
188    { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
189    { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
190    { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
191    { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
192    { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
193    { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
194    { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
195    { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },
196    { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
197    { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
198    { OP_NODATA, 0,   NULL,               NULL,            NULL }    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
199      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
200      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
201      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
202    #ifdef JFRIEDL_DEBUG
203      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
204    #endif
205      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
206      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
207      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
208      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
209      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
210      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
211      { OP_NODATA,    0,        NULL,               NULL,            NULL }
212  };  };
213    
214    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
215    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
216    that the combination of -w and -x has the same effect as -x on its own, so we
217    can treat them as the same. */
218    
219    static const char *prefix[] = {
220      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
221    
222    static const char *suffix[] = {
223      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
224    
225    /* UTF-8 tables - used only when the newline setting is "all". */
226    
227    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
228    
229    const char utf8_table4[] = {
230      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
231      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
232      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
233      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
234    
235    
236    
237  /*************************************************  /*************************************************
238  *       Functions for directory scanning         *  *            OS-specific functions               *
239  *************************************************/  *************************************************/
240    
241  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
242  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
243    
244    
245  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
# Line 194  closedir(dir); Line 286  closedir(dir);
286  }  }
287    
288    
289    /************* Test for regular file in Unix **********/
290    
291    static int
292    isregfile(char *filename)
293    {
294    struct stat statbuf;
295    if (stat(filename, &statbuf) < 0)
296      return 1;        /* In the expectation that opening as a file will fail */
297    return (statbuf.st_mode & S_IFMT) == S_IFREG;
298    }
299    
300    
301    /************* Test stdout for being a terminal in Unix **********/
302    
303    static BOOL
304    is_stdout_tty(void)
305    {
306    return isatty(fileno(stdout));
307    }
308    
309    
310  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
311    
312  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
# Line 292  free(dir); Line 405  free(dir);
405  }  }
406    
407    
408    /************* Test for regular file in Win32 **********/
409    
410    /* I don't know how to do this, or if it can be done; assume all paths are
411    regular if they are not directories. */
412    
413    int isregfile(char *filename)
414    {
415    return !isdirectory(filename)
416    }
417    
418    
419    /************* Test stdout for being a terminal in Win32 **********/
420    
421    /* I don't know how to do this; assume never */
422    
423    static BOOL
424    is_stdout_tty(void)
425    {
426    FALSE;
427    }
428    
429    
430  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
431    
432  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 300  free(dir); Line 435  free(dir);
435    
436  typedef void directory_type;  typedef void directory_type;
437    
438  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
439  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) {}
440  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) {}
441  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
442    
443    
444    /************* Test for regular when we can't do it **********/
445    
446    /* Assume all files are regular. */
447    
448    int isregfile(char *filename) { return 1; }
449    
450    
451    /************* Test stdout for being a terminal when we can't do it **********/
452    
453    static BOOL
454    is_stdout_tty(void)
455    {
456    return FALSE;
457    }
458    
459    
460  #endif  #endif
461    
462    
# Line 332  return sys_errlist[n]; Line 484  return sys_errlist[n];
484    
485    
486  /*************************************************  /*************************************************
487    *             Find end of line                   *
488    *************************************************/
489    
490    /* The length of the endline sequence that is found is set via lenptr. This may
491    be zero at the very end of the file if there is no line-ending sequence there.
492    
493    Arguments:
494      p         current position in line
495      endptr    end of available data
496      lenptr    where to put the length of the eol sequence
497    
498    Returns:    pointer to the last byte of the line
499    */
500    
501    static char *
502    end_of_line(char *p, char *endptr, int *lenptr)
503    {
504    switch(endlinetype)
505      {
506      default:      /* Just in case */
507      case EL_LF:
508      while (p < endptr && *p != '\n') p++;
509      if (p < endptr)
510        {
511        *lenptr = 1;
512        return p + 1;
513        }
514      *lenptr = 0;
515      return endptr;
516    
517      case EL_CR:
518      while (p < endptr && *p != '\r') p++;
519      if (p < endptr)
520        {
521        *lenptr = 1;
522        return p + 1;
523        }
524      *lenptr = 0;
525      return endptr;
526    
527      case EL_CRLF:
528      for (;;)
529        {
530        while (p < endptr && *p != '\r') p++;
531        if (++p >= endptr)
532          {
533          *lenptr = 0;
534          return endptr;
535          }
536        if (*p == '\n')
537          {
538          *lenptr = 2;
539          return p + 1;
540          }
541        }
542      break;
543    
544      case EL_ANY:
545      while (p < endptr)
546        {
547        int extra = 0;
548        register int c = *((unsigned char *)p);
549    
550        if (utf8 && c >= 0xc0)
551          {
552          int gcii, gcss;
553          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
554          gcss = 6*extra;
555          c = (c & utf8_table3[extra]) << gcss;
556          for (gcii = 1; gcii <= extra; gcii++)
557            {
558            gcss -= 6;
559            c |= (p[gcii] & 0x3f) << gcss;
560            }
561          }
562    
563        p += 1 + extra;
564    
565        switch (c)
566          {
567          case 0x0a:    /* LF */
568          case 0x0b:    /* VT */
569          case 0x0c:    /* FF */
570          *lenptr = 1;
571          return p;
572    
573          case 0x0d:    /* CR */
574          if (p < endptr && *p == 0x0a)
575            {
576            *lenptr = 2;
577            p++;
578            }
579          else *lenptr = 1;
580          return p;
581    
582          case 0x85:    /* NEL */
583          *lenptr = utf8? 2 : 1;
584          return p;
585    
586          case 0x2028:  /* LS */
587          case 0x2029:  /* PS */
588          *lenptr = 3;
589          return p;
590    
591          default:
592          break;
593          }
594        }   /* End of loop for ANY case */
595    
596      *lenptr = 0;  /* Must have hit the end */
597      return endptr;
598      }     /* End of overall switch */
599    }
600    
601    
602    
603    /*************************************************
604    *         Find start of previous line            *
605    *************************************************/
606    
607    /* This is called when looking back for before lines to print.
608    
609    Arguments:
610      p         start of the subsequent line
611      startptr  start of available data
612    
613    Returns:    pointer to the start of the previous line
614    */
615    
616    static char *
617    previous_line(char *p, char *startptr)
618    {
619    switch(endlinetype)
620      {
621      default:      /* Just in case */
622      case EL_LF:
623      p--;
624      while (p > startptr && p[-1] != '\n') p--;
625      return p;
626    
627      case EL_CR:
628      p--;
629      while (p > startptr && p[-1] != '\n') p--;
630      return p;
631    
632      case EL_CRLF:
633      for (;;)
634        {
635        p -= 2;
636        while (p > startptr && p[-1] != '\n') p--;
637        if (p <= startptr + 1 || p[-2] == '\r') return p;
638        }
639      return p;   /* But control should never get here */
640    
641      case EL_ANY:
642      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
643      if (utf8) while ((*p & 0xc0) == 0x80) p--;
644    
645      while (p > startptr)
646        {
647        register int c;
648        char *pp = p - 1;
649    
650        if (utf8)
651          {
652          int extra = 0;
653          while ((*pp & 0xc0) == 0x80) pp--;
654          c = *((unsigned char *)pp);
655          if (c >= 0xc0)
656            {
657            int gcii, gcss;
658            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
659            gcss = 6*extra;
660            c = (c & utf8_table3[extra]) << gcss;
661            for (gcii = 1; gcii <= extra; gcii++)
662              {
663              gcss -= 6;
664              c |= (pp[gcii] & 0x3f) << gcss;
665              }
666            }
667          }
668        else c = *((unsigned char *)pp);
669    
670        switch (c)
671          {
672          case 0x0a:    /* LF */
673          case 0x0b:    /* VT */
674          case 0x0c:    /* FF */
675          case 0x0d:    /* CR */
676          case 0x85:    /* NEL */
677          case 0x2028:  /* LS */
678          case 0x2029:  /* PS */
679          return p;
680    
681          default:
682          break;
683          }
684    
685        p = pp;  /* Back one character */
686        }        /* End of loop for ANY case */
687    
688      return startptr;  /* Hit start of data */
689      }     /* End of overall switch */
690    }
691    
692    
693    
694    
695    
696    /*************************************************
697  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
698  *************************************************/  *************************************************/
699    
700  /* This is called if we are about to lose said lines because of buffer filling,  /* This is called if we are about to lose said lines because of buffer filling,
701  and at the end of the file.  and at the end of the file. The data in the line is written using fwrite() so
702    that a binary zero does not terminate it.
703    
704  Arguments:  Arguments:
705    lastmatchnumber   the number of the last matching line, plus one    lastmatchnumber   the number of the last matching line, plus one
# Line 355  if (after_context > 0 && lastmatchnumber Line 718  if (after_context > 0 && lastmatchnumber
718    int count = 0;    int count = 0;
719    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
720      {      {
721        int ellength;
722      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
723      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
724      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
725      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
726      fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
727      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
728      }      }
729    hyphenpending = TRUE;    hyphenpending = TRUE;
730    }    }
# Line 417  way, the buffer is shifted left and re-f Line 781  way, the buffer is shifted left and re-f
781    
782  while (ptr < endptr)  while (ptr < endptr)
783    {    {
784    int i;    int i, endlinelength;
785      int mrc = 0;
786    BOOL match = FALSE;    BOOL match = FALSE;
787    char *t = ptr;    char *t = ptr;
788    size_t length, linelength;    size_t length, linelength;
# Line 429  while (ptr < endptr) Line 794  while (ptr < endptr)
794    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
795    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
796    
797    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
798    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
799    length = multiline? endptr - ptr : linelength;    length = multiline? endptr - ptr : linelength;
800    
801      /* Extra processing for Jeffrey Friedl's debugging. */
802    
803    #ifdef JFRIEDL_DEBUG
804      if (jfriedl_XT || jfriedl_XR)
805      {
806          #include <sys/time.h>
807          #include <time.h>
808          struct timeval start_time, end_time;
809          struct timezone dummy;
810    
811          if (jfriedl_XT)
812          {
813              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
814              const char *orig = ptr;
815              ptr = malloc(newlen + 1);
816              if (!ptr) {
817                      printf("out of memory");
818                      exit(2);
819              }
820              endptr = ptr;
821              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
822              for (i = 0; i < jfriedl_XT; i++) {
823                      strncpy(endptr, orig,  length);
824                      endptr += length;
825              }
826              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
827              length = newlen;
828          }
829    
830          if (gettimeofday(&start_time, &dummy) != 0)
831                  perror("bad gettimeofday");
832    
833    
834          for (i = 0; i < jfriedl_XR; i++)
835              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
836    
837          if (gettimeofday(&end_time, &dummy) != 0)
838                  perror("bad gettimeofday");
839    
840          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
841                          -
842                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
843    
844          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
845          return 0;
846      }
847    #endif
848    
849    
850    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
851    the final newline in the subject string. */    the final newline in the subject string. */
852    
853    for (i = 0; !match && i < pattern_count; i++)    for (i = 0; i < pattern_count; i++)
854      {      {
855      match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
856        offsets, 99) >= 0;        offsets, 99);
857        if (mrc >= 0) { match = TRUE; break; }
858        if (mrc != PCRE_ERROR_NOMATCH)
859          {
860          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
861          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
862          fprintf(stderr, "this line:\n");
863          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
864          fprintf(stderr, "\n");
865          if (error_count == 0 &&
866              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
867            {
868            fprintf(stderr, "pcregrep: error %d means that a resource limit "
869              "was exceeded\n", mrc);
870            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
871            }
872          if (error_count++ > 20)
873            {
874            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
875            exit(2);
876            }
877          match = invert;    /* No more matching; don't show the line again */
878          break;
879          }
880      }      }
881    
882    /* If it's a match or a not-match (as required), print what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
883    
884    if (match != invert)    if (match != invert)
885      {      {
886      BOOL hyphenprinted = FALSE;      BOOL hyphenprinted = FALSE;
887    
888      if (filenames_nomatch_only) return 1;      /* We've failed if we want a file that doesn't have any matches. */
889    
890        if (filenames == FN_NOMATCH_ONLY) return 1;
891    
892        /* Just count if just counting is wanted. */
893    
894      if (count_only) count++;      if (count_only) count++;
895    
896      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
897        in the file. */
898    
899        else if (filenames == FN_ONLY)
900        {        {
901        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
902        return 0;        return 0;
903        }        }
904    
905        /* Likewise, if all we want is a yes/no answer. */
906    
907      else if (quiet) return 0;      else if (quiet) return 0;
908    
909        /* The --only-matching option prints just the substring that matched, and
910        does not pring any context. */
911    
912        else if (only_matching)
913          {
914          if (printname != NULL) fprintf(stdout, "%s:", printname);
915          if (number) fprintf(stdout, "%d:", linenumber);
916          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
917          fprintf(stdout, "\n");
918          }
919    
920        /* This is the default case when none of the above options is set. We print
921        the matching lines(s), possibly preceded and/or followed by other lines of
922        context. */
923    
924      else      else
925        {        {
926        /* See if there is a requirement to print some "after" lines from a        /* See if there is a requirement to print some "after" lines from a
# Line 467  while (ptr < endptr) Line 928  while (ptr < endptr)
928    
929        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
930          {          {
931            int ellength;
932          int linecount = 0;          int linecount = 0;
933          char *p = lastmatchrestart;          char *p = lastmatchrestart;
934    
935          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
936            {            {
937            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
938            linecount++;            linecount++;
939            }            }
940    
941          /* It is important to advance lastmatchrestart during this printing so          /* It is important to advance lastmatchrestart during this printing so
942          that it interacts correctly with any "before" printing below. */          that it interacts correctly with any "before" printing below. Print
943            each line's data using fwrite() in case there are binary zeroes. */
944    
945          while (lastmatchrestart < p)          while (lastmatchrestart < p)
946            {            {
947            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
948            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
949            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
950            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
951            fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
952            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
953            }            }
954          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
955          }          }
# Line 510  while (ptr < endptr) Line 972  while (ptr < endptr)
972          char *p = ptr;          char *p = ptr;
973    
974          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
975                 linecount++ < before_context)                 linecount < before_context)
976            {            {
977            p--;            linecount++;
978            while (p > buffer && p[-1] != '\n') p--;            p = previous_line(p, buffer);
979            }            }
980    
981          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 521  while (ptr < endptr) Line 983  while (ptr < endptr)
983    
984          while (p < ptr)          while (p < ptr)
985            {            {
986              int ellength;
987            char *pp = p;            char *pp = p;
988            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
989            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
990            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
991            fprintf(stdout, "%.*s", pp - p + 1, p);            fwrite(p, 1, pp - p, stdout);
992            p = pp + 1;            p = pp;
993            }            }
994          }          }
995    
996        /* Now print the matching line(s); ensure we set hyphenpending at the end        /* Now print the matching line(s); ensure we set hyphenpending at the end
997        of the file. */        of the file if any context lines are being output. */
998    
999          if (after_context > 0 || before_context > 0)
1000            endhyphenpending = TRUE;
1001    
       endhyphenpending = TRUE;  
1002        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (printname != NULL) fprintf(stdout, "%s:", printname);
1003        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
1004    
1005        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1006        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1007        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1008        start of the match will always be before the first \n character. */        start of the match will always be before the first newline sequence. */
1009    
1010        if (multiline)        if (multiline)
1011          {          {
1012            int ellength;
1013          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1014          t = ptr;          t = ptr;
1015          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch)
1016          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1017          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &ellength);
1018              if (t <= endmatch) linenumber++; else break;
1019              }
1020            endmatch = end_of_line(endmatch, endptr, &ellength);
1021            linelength = endmatch - ptr - ellength;
1022            }
1023    
1024          /*** NOTE: Use only fwrite() to output the data line, so that binary
1025          zeroes are treated as just another data character. */
1026    
1027          /* This extra option, for Jeffrey Friedl's debugging requirements,
1028          replaces the matched string, or a specific captured string if it exists,
1029          with X. When this happens, colouring is ignored. */
1030    
1031    #ifdef JFRIEDL_DEBUG
1032          if (S_arg >= 0 && S_arg < mrc)
1033            {
1034            int first = S_arg * 2;
1035            int last  = first + 1;
1036            fwrite(ptr, 1, offsets[first], stdout);
1037            fprintf(stdout, "X");
1038            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1039          }          }
1040          else
1041    #endif
1042    
1043          /* We have to split the line(s) up if colouring. */
1044    
1045        fprintf(stdout, "%.*s\n", linelength, ptr);        if (do_colour)
1046            {
1047            fwrite(ptr, 1, offsets[0], stdout);
1048            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1049            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1050            fprintf(stdout, "%c[00m", 0x1b);
1051            fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1052            }
1053          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1054        }        }
1055    
1056        /* End of doing what has to be done for a match */
1057    
1058      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1059    
1060      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1061      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1062    
1063      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1064      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1065      }      }
1066    
1067    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1068    
1069    ptr += linelength + 1;    ptr += linelength + endlinelength;
1070    linenumber++;    linenumber++;
1071    
1072    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 599  while (ptr < endptr) Line 1100  while (ptr < endptr)
1100  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1101  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1102    
1103  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);  if (!only_matching && !count_only)
1104  hyphenpending |= endhyphenpending;    {
1105      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1106      hyphenpending |= endhyphenpending;
1107      }
1108    
1109  /* Print the file name if we are looking for those without matches and there  /* Print the file name if we are looking for those without matches and there
1110  were none. If we found a match, we won't have got this far. */  were none. If we found a match, we won't have got this far. */
1111    
1112  if (filenames_nomatch_only)  if (filenames == FN_NOMATCH_ONLY)
1113    {    {
1114    fprintf(stdout, "%s\n", printname);    fprintf(stdout, "%s\n", printname);
1115    return 0;    return 0;
# Line 633  recursing; if it's a file, grep it. Line 1137  recursing; if it's a file, grep it.
1137    
1138  Arguments:  Arguments:
1139    pathname          the path to investigate    pathname          the path to investigate
1140    dir_recurse       TRUE if recursing is wanted (-r)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   show_filenames    TRUE if file names are wanted for multiple files, except  
                       for the only file at top level when not filenames_only  
1141    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1142    
1143  Returns:   0 if there was at least one match  Returns:   0 if there was at least one match
# Line 646  However, file opening failures are suppr Line 1148  However, file opening failures are suppr
1148  */  */
1149    
1150  static int  static int
1151  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
1152  {  {
1153  int rc = 1;  int rc = 1;
1154  int sep;  int sep;
1155  FILE *in;  FILE *in;
 char *printname;  
1156    
1157  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1158    
1159  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1160    {    {
1161    return pcregrep(stdin,    return pcregrep(stdin,
1162      (filenames_only || filenames_nomatch_only ||      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
     (show_filenames && !only_one_at_top))?  
1163        stdin_name : NULL);        stdin_name : NULL);
1164    }    }
1165    
 /* If the file is a directory and we are recursing, scan each file within it,  
 subject to any include or exclude patterns that were set. The scanning code is  
 localized so it can be made system-specific. */  
   
 if ((sep = isdirectory(pathname)) != 0 && dir_recurse)  
   {  
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(pathname);  
1166    
1167    if (dir == NULL)  /* If the file is a directory, skip if skipping or if we are recursing, scan
1168      {  each file within it, subject to any include or exclude patterns that were set.
1169      if (!silent)  The scanning code is localized so it can be made system-specific. */
       fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,  
         strerror(errno));  
     return 2;  
     }  
1170    
1171    while ((nextfile = readdirectory(dir)) != NULL)  if ((sep = isdirectory(pathname)) != 0)
1172      {
1173      if (dee_action == dee_SKIP) return 1;
1174      if (dee_action == dee_RECURSE)
1175      {      {
1176      int frc, blen;      char buffer[1024];
1177      sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);      char *nextfile;
1178      blen = strlen(buffer);      directory_type *dir = opendirectory(pathname);
1179    
1180      if (exclude_compiled != NULL &&      if (dir == NULL)
1181          pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)        {
1182        continue;        if (!silent)
1183            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1184              strerror(errno));
1185          return 2;
1186          }
1187    
1188      if (include_compiled != NULL &&      while ((nextfile = readdirectory(dir)) != NULL)
1189          pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)        {
1190        continue;        int frc, blen;
1191          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1192          blen = strlen(buffer);
1193    
1194          if (exclude_compiled != NULL &&
1195              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1196            continue;
1197    
1198          if (include_compiled != NULL &&
1199              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1200            continue;
1201    
1202          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1203          if (frc > 1) rc = frc;
1204           else if (frc == 0 && rc == 1) rc = 0;
1205          }
1206    
1207      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);      closedirectory(dir);
1208      if (frc > 1) rc = frc;      return rc;
      else if (frc == 0 && rc == 1) rc = 0;  
1209      }      }
   
   closedirectory(dir);  
   return rc;  
1210    }    }
1211    
1212  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* If the file is not a directory and not a regular file, skip it if that's
1213  the first and only argument at top level, we don't show the file name (unless  been requested. */
1214  we are only showing the file name). Otherwise, control is via the  
1215  show_filenames variable. */  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1216    
1217    /* Control reaches here if we have a regular file, or if we have a directory
1218    and recursion or skipping was not requested, or if we have anything else and
1219    skipping was not requested. The scan proceeds. If this is the first and only
1220    argument at top level, we don't show the file name, unless we are only showing
1221    the file name, or the filename was forced (-H). */
1222    
1223  in = fopen(pathname, "r");  in = fopen(pathname, "r");
1224  if (in == NULL)  if (in == NULL)
# Line 719  if (in == NULL) Line 1229  if (in == NULL)
1229    return 2;    return 2;
1230    }    }
1231    
1232  printname =  (filenames_only || filenames_nomatch_only ||  rc = pcregrep(in, (filenames > FN_DEFAULT ||
1233    (show_filenames && !only_one_at_top))? pathname : NULL;    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
   
 rc = pcregrep(in, printname);  
1234    
1235  fclose(in);  fclose(in);
1236  return rc;  return rc;
# Line 738  return rc; Line 1246  return rc;
1246  static int  static int
1247  usage(int rc)  usage(int rc)
1248  {  {
1249  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");  option_item *op;
1250    fprintf(stderr, "Usage: pcregrep [-");
1251    for (op = optionlist; op->one_char != 0; op++)
1252      {
1253      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1254      }
1255    fprintf(stderr, "] [long options] [pattern] [files]\n");
1256  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1257  return rc;  return rc;
1258  }  }
# Line 757  option_item *op; Line 1271  option_item *op;
1271    
1272  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1273  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1274  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1275  printf("\"-\" can be used as a file name to mean STDIN.\n");  printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1276  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1277    
1278  printf("Options:\n");  printf("Options:\n");
# Line 794  handle_option(int letter, int options) Line 1308  handle_option(int letter, int options)
1308  {  {
1309  switch(letter)  switch(letter)
1310    {    {
1311    case -1:  help(); exit(0);    case N_HELP: help(); exit(0);
1312    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1313    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1314      case 'H': filenames = FN_FORCE; break;
1315      case 'h': filenames = FN_NONE; break;
1316    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1317    case 'l': filenames_only = TRUE; break;    case 'l': filenames = FN_ONLY; break;
1318    case 'L': filenames_nomatch_only = TRUE; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1319    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1320    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1321      case 'o': only_matching = TRUE; break;
1322    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1323    case 'r': recurse = TRUE; break;    case 'r': dee_action = dee_RECURSE; break;
1324    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1325    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1326    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1327    case 'w': word_match = TRUE; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1328    case 'x': whole_lines = TRUE; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1329    
1330    case 'V':    case 'V':
1331    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s using ", VERSION);
# Line 828  return options; Line 1345  return options;
1345    
1346    
1347  /*************************************************  /*************************************************
1348    *          Construct printed ordinal             *
1349    *************************************************/
1350    
1351    /* This turns a number into "1st", "3rd", etc. */
1352    
1353    static char *
1354    ordin(int n)
1355    {
1356    static char buffer[8];
1357    char *p = buffer;
1358    sprintf(p, "%d", n);
1359    while (*p != 0) p++;
1360    switch (n%10)
1361      {
1362      case 1: strcpy(p, "st"); break;
1363      case 2: strcpy(p, "nd"); break;
1364      case 3: strcpy(p, "rd"); break;
1365      default: strcpy(p, "th"); break;
1366      }
1367    return buffer;
1368    }
1369    
1370    
1371    
1372    /*************************************************
1373    *          Compile a single pattern              *
1374    *************************************************/
1375    
1376    /* When the -F option has been used, this is called for each substring.
1377    Otherwise it's called for each supplied pattern.
1378    
1379    Arguments:
1380      pattern        the pattern string
1381      options        the PCRE options
1382      filename       the file name, or NULL for a command-line pattern
1383      count          0 if this is the only command line pattern, or
1384                     number of the command line pattern, or
1385                     linenumber for a pattern from a file
1386    
1387    Returns:         TRUE on success, FALSE after an error
1388    */
1389    
1390    static BOOL
1391    compile_single_pattern(char *pattern, int options, char *filename, int count)
1392    {
1393    char buffer[MBUFTHIRD + 16];
1394    const char *error;
1395    int errptr;
1396    
1397    if (pattern_count >= MAX_PATTERN_COUNT)
1398      {
1399      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1400        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1401      return FALSE;
1402      }
1403    
1404    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1405      suffix[process_options]);
1406    pattern_list[pattern_count] =
1407      pcre_compile(buffer, options, &error, &errptr, pcretables);
1408    if (pattern_list[pattern_count++] != NULL) return TRUE;
1409    
1410    /* Handle compile errors */
1411    
1412    errptr -= (int)strlen(prefix[process_options]);
1413    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1414    
1415    if (filename == NULL)
1416      {
1417      if (count == 0)
1418        fprintf(stderr, "pcregrep: Error in command-line regex "
1419          "at offset %d: %s\n", errptr, error);
1420      else
1421        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1422          "at offset %d: %s\n", ordin(count), errptr, error);
1423      }
1424    else
1425      {
1426      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1427        "at offset %d: %s\n", count, filename, errptr, error);
1428      }
1429    
1430    return FALSE;
1431    }
1432    
1433    
1434    
1435    /*************************************************
1436    *           Compile one supplied pattern         *
1437    *************************************************/
1438    
1439    /* When the -F option has been used, each string may be a list of strings,
1440    separated by line breaks. They will be matched literally.
1441    
1442    Arguments:
1443      pattern        the pattern string
1444      options        the PCRE options
1445      filename       the file name, or NULL for a command-line pattern
1446      count          0 if this is the only command line pattern, or
1447                     number of the command line pattern, or
1448                     linenumber for a pattern from a file
1449    
1450    Returns:         TRUE on success, FALSE after an error
1451    */
1452    
1453    static BOOL
1454    compile_pattern(char *pattern, int options, char *filename, int count)
1455    {
1456    if ((process_options & PO_FIXED_STRINGS) != 0)
1457      {
1458      char *eop = pattern + strlen(pattern);
1459      char buffer[MBUFTHIRD];
1460      for(;;)
1461        {
1462        int ellength;
1463        char *p = end_of_line(pattern, eop, &ellength);
1464        if (ellength == 0)
1465          return compile_single_pattern(pattern, options, filename, count);
1466        sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1467        pattern = p;
1468        if (!compile_single_pattern(buffer, options, filename, count))
1469          return FALSE;
1470        }
1471      }
1472    else return compile_single_pattern(pattern, options, filename, count);
1473    }
1474    
1475    
1476    
1477    /*************************************************
1478  *                Main program                    *  *                Main program                    *
1479  *************************************************/  *************************************************/
1480    
# Line 838  main(int argc, char **argv) Line 1485  main(int argc, char **argv)
1485  {  {
1486  int i, j;  int i, j;
1487  int rc = 1;  int rc = 1;
1488  int options = 0;  int pcre_options = 0;
1489    int cmd_pattern_count = 0;
1490  int errptr;  int errptr;
 const char *error;  
1491  BOOL only_one_at_top;  BOOL only_one_at_top;
1492    char *patterns[MAX_PATTERN_COUNT];
1493    const char *locale_from = "--locale";
1494    const char *error;
1495    
1496    /* Set the default line ending value from the default in the PCRE library;
1497    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1498    */
1499    
1500    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1501    switch(i)
1502      {
1503      default:                 newline = (char *)"lf"; break;
1504      case '\r':               newline = (char *)"cr"; break;
1505      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1506      case -1:                 newline = (char *)"any"; break;
1507      }
1508    
1509  /* Process the options */  /* Process the options */
1510    
# Line 855  for (i = 1; i < argc; i++) Line 1518  for (i = 1; i < argc; i++)
1518    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1519    
1520    /* If we hit an argument that is just "-", it may be a reference to STDIN,    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1521    but only if we have previously had -f to define the patterns. */    but only if we have previously had -e or -f to define the patterns. */
1522    
1523    if (argv[i][1] == 0)    if (argv[i][1] == 0)
1524      {      {
1525      if (pattern_filename != NULL) break;      if (pattern_filename != NULL || pattern_count > 0) break;
1526        else exit(usage(2));        else exit(usage(2));
1527      }      }
1528    
# Line 881  for (i = 1; i < argc; i++) Line 1544  for (i = 1; i < argc; i++)
1544      /* Some long options have data that follows after =, for example file=name.      /* Some long options have data that follows after =, for example file=name.
1545      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
1546      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
1547      with Jeff Friedl in preferring "regex" without the "p". These options are      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1548      entered in the table as "regex(p)". No option is in both these categories,      These options are entered in the table as "regex(p)". No option is in both
1549      fortunately. */      these categories, fortunately. */
1550    
1551      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1552        {        {
# Line 931  for (i = 1; i < argc; i++) Line 1594  for (i = 1; i < argc; i++)
1594        }        }
1595      }      }
1596    
1597    
1598      /* Jeffrey Friedl's debugging harness uses these additional options which
1599      are not in the right form for putting in the option table because they use
1600      only one hyphen, yet are more than one character long. By putting them
1601      separately here, they will not get displayed as part of the help() output,
1602      but I don't think Jeffrey will care about that. */
1603    
1604    #ifdef JFRIEDL_DEBUG
1605      else if (strcmp(argv[i], "-pre") == 0) {
1606              jfriedl_prefix = argv[++i];
1607              continue;
1608      } else if (strcmp(argv[i], "-post") == 0) {
1609              jfriedl_postfix = argv[++i];
1610              continue;
1611      } else if (strcmp(argv[i], "-XT") == 0) {
1612              sscanf(argv[++i], "%d", &jfriedl_XT);
1613              continue;
1614      } else if (strcmp(argv[i], "-XR") == 0) {
1615              sscanf(argv[++i], "%d", &jfriedl_XR);
1616              continue;
1617      }
1618    #endif
1619    
1620    
1621    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
1622    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
1623    
# Line 953  for (i = 1; i < argc; i++) Line 1640  for (i = 1; i < argc; i++)
1640          option_data = s+1;          option_data = s+1;
1641          break;          break;
1642          }          }
1643        options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1644        }        }
1645      }      }
1646    
1647    /* At this point we should have op pointing to a matched option */    /* At this point we should have op pointing to a matched option. If the type
1648      is NO_DATA, it means that there is no data, and the option might set
1649      something in the PCRE options. */
1650    
1651    if (op->type == OP_NODATA)    if (op->type == OP_NODATA)
     options = handle_option(op->one_char, options);  
   else  
1652      {      {
1653      if (*option_data == 0)      pcre_options = handle_option(op->one_char, pcre_options);
1654        continue;
1655        }
1656    
1657      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1658      either has a value or defaults to something. It cannot have data in a
1659      separate item. At the moment, the only such options are "colo(u)r" and
1660      Jeffrey Friedl's special -S debugging option. */
1661    
1662      if (*option_data == 0 &&
1663          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1664        {
1665        switch (op->one_char)
1666        {        {
1667        if (i >= argc - 1 || longopwasequals)        case N_COLOUR:
1668          {        colour_option = (char *)"auto";
1669          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        break;
1670          exit(usage(2));  #ifdef JFRIEDL_DEBUG
1671          }        case 'S':
1672        option_data = argv[++i];        S_arg = 0;
1673          break;
1674    #endif
1675        }        }
1676        continue;
1677        }
1678    
1679      /* Otherwise, find the data string for the option. */
1680    
1681      if (*option_data == 0)
1682        {
1683        if (i >= argc - 1 || longopwasequals)
1684          {
1685          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1686          exit(usage(2));
1687          }
1688        option_data = argv[++i];
1689        }
1690    
1691      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1692      multiple times to create a list of patterns. */
1693    
1694      if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else    if (op->type == OP_PATLIST)
1695        {
1696        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1697        {        {
1698        char *endptr;        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1699        int n = strtoul(option_data, &endptr, 10);          MAX_PATTERN_COUNT);
1700        if (*endptr != 0)        return 2;
1701          }
1702        patterns[cmd_pattern_count++] = option_data;
1703        }
1704    
1705      /* Otherwise, deal with single string or numeric data values. */
1706    
1707      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1708        {
1709        *((char **)op->dataptr) = option_data;
1710        }
1711      else
1712        {
1713        char *endptr;
1714        int n = strtoul(option_data, &endptr, 10);
1715        if (*endptr != 0)
1716          {
1717          if (longop)
1718          {          {
1719          if (longop)          char *equals = strchr(op->long_name, '=');
1720            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1721              option_data, op->long_name);            equals - op->long_name;
1722          else          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1723            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",            option_data, nlen, op->long_name);
             option_data, op->one_char);  
         exit(usage(2));  
1724          }          }
1725        *((int *)op->dataptr) = n;        else
1726            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1727              option_data, op->one_char);
1728          exit(usage(2));
1729        }        }
1730        *((int *)op->dataptr) = n;
1731      }      }
1732    }    }
1733    
# Line 1001  if (both_context > 0) Line 1740  if (both_context > 0)
1740    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
1741    }    }
1742    
1743  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* If a locale has not been provided as an option, see if the LC_CTYPE or
1744  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  LC_ALL environment variable is set, and if so, use it. */
1745    
1746  if (pattern_list == NULL || hints_list == NULL)  if (locale == NULL)
1747    {    {
1748    fprintf(stderr, "pcregrep: malloc failed\n");    locale = getenv("LC_ALL");
1749    return 2;    locale_from = "LCC_ALL";
1750    }    }
1751    
1752  /* Compile the regular expression(s). */  if (locale == NULL)
   
 if (pattern_filename != NULL)  
1753    {    {
1754    FILE *f = fopen(pattern_filename, "r");    locale = getenv("LC_CTYPE");
1755    char buffer[MBUFTHIRD + 16];    locale_from = "LC_CTYPE";
1756    char *rdstart;    }
   int adjust = 0;  
1757    
1758    if (f == NULL)  /* If a locale has been provided, set it, and generate the tables the PCRE
1759    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1760    
1761    if (locale != NULL)
1762      {
1763      if (setlocale(LC_CTYPE, locale) == NULL)
1764      {      {
1765      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1766        strerror(errno));        locale, locale_from);
1767      return 2;      return 2;
1768      }      }
1769      pcretables = pcre_maketables();
1770      }
1771    
1772    /* Sort out colouring */
1773    
1774    if (whole_lines)  if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1775      {
1776      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1777      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1778      else
1779      {      {
1780      strcpy(buffer, "^(?:");      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1781      adjust = 4;        colour_option);
1782        return 2;
1783      }      }
1784    else if (word_match)    if (do_colour)
1785      {      {
1786      strcpy(buffer, "\\b");      char *cs = getenv("PCREGREP_COLOUR");
1787      adjust = 2;      if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1788        if (cs != NULL) colour_string = cs;
1789      }      }
1790      }
1791    
1792    /* Interpret the newline type; the default settings are Unix-like. */
1793    
1794    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1795      {
1796      pcre_options |= PCRE_NEWLINE_CR;
1797      endlinetype = EL_CR;
1798      }
1799    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1800      {
1801      pcre_options |= PCRE_NEWLINE_LF;
1802      endlinetype = EL_LF;
1803      }
1804    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1805      {
1806      pcre_options |= PCRE_NEWLINE_CRLF;
1807      endlinetype = EL_CRLF;
1808      }
1809    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1810      {
1811      pcre_options |= PCRE_NEWLINE_ANY;
1812      endlinetype = EL_ANY;
1813      }
1814    else
1815      {
1816      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1817      return 2;
1818      }
1819    
1820    /* Interpret the text values for -d and -D */
1821    
1822    rdstart = buffer + adjust;  if (dee_option != NULL)
1823    while (fgets(rdstart, MBUFTHIRD, f) != NULL)    {
1824      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1825      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1826      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1827      else
1828      {      {
1829      char *s = rdstart + (int)strlen(rdstart);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1830      if (pattern_count >= MAX_PATTERN_COUNT)      return 2;
       {  
       fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",  
         MAX_PATTERN_COUNT);  
       return 2;  
       }  
     while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;  
     if (s == rdstart) continue;  
     if (whole_lines) strcpy(s, ")$");  
       else if (word_match)strcpy(s, "\\b");  
         else *s = 0;  
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr - adjust, error);  
       return 2;  
       }  
1831      }      }
   fclose(f);  
1832    }    }
1833    
1834  /* If no file name, a single regex must be given inline. */  if (DEE_option != NULL)
1835      {
1836      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1837      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1838      else
1839        {
1840        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1841        return 2;
1842        }
1843      }
1844    
1845  else  /* Check the values for Jeffrey Friedl's debugging options. */
1846    
1847    #ifdef JFRIEDL_DEBUG
1848    if (S_arg > 9)
1849    {    {
1850    char buffer[MBUFTHIRD + 16];    fprintf(stderr, "pcregrep: bad value for -S option\n");
1851    char *pat;    return 2;
1852    int adjust = 0;    }
1853    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1854      {
1855      if (jfriedl_XT == 0) jfriedl_XT = 1;
1856      if (jfriedl_XR == 0) jfriedl_XR = 1;
1857      }
1858    #endif
1859    
1860    /* Get memory to store the pattern and hints lists. */
1861    
1862    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1863    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1864    
1865    if (pattern_list == NULL || hints_list == NULL)
1866      {
1867      fprintf(stderr, "pcregrep: malloc failed\n");
1868      return 2;
1869      }
1870    
1871    /* If no patterns were provided by -e, and there is no file provided by -f,
1872    the first argument is the one and only pattern, and it must exist. */
1873    
1874    if (cmd_pattern_count == 0 && pattern_filename == NULL)
1875      {
1876    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
1877      patterns[cmd_pattern_count++] = argv[i++];
1878      }
1879    
1880    /* Compile the patterns that were provided on the command line, either by
1881    multiple uses of -e or as a single unkeyed pattern. */
1882    
1883    for (j = 0; j < cmd_pattern_count; j++)
1884      {
1885      if (!compile_pattern(patterns[j], pcre_options, NULL,
1886           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1887        return 2;
1888      }
1889    
1890    /* Compile the regular expressions that are provided in a file. */
1891    
1892    if (pattern_filename != NULL)
1893      {
1894      int linenumber = 0;
1895      FILE *f;
1896      char *filename;
1897      char buffer[MBUFTHIRD];
1898    
1899    if (whole_lines)    if (strcmp(pattern_filename, "-") == 0)
1900      {      {
1901      sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);      f = stdin;
1902      pat = buffer;      filename = stdin_name;
     adjust = 4;  
1903      }      }
1904    else if (word_match)    else
1905      {      {
1906      sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);      f = fopen(pattern_filename, "r");
1907      pat = buffer;      if (f == NULL)
1908      adjust = 2;        {
1909          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1910            strerror(errno));
1911          return 2;
1912          }
1913        filename = pattern_filename;
1914      }      }
   else pat = argv[i++];  
   
   pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);  
1915    
1916    if (pattern_list[0] == NULL)    while (fgets(buffer, MBUFTHIRD, f) != NULL)
1917      {      {
1918      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",      char *s = buffer + (int)strlen(buffer);
1919        errptr - adjust, error);      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1920      return 2;      *s = 0;
1921        linenumber++;
1922        if (buffer[0] == 0) continue;   /* Skip blank lines */
1923        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1924          return 2;
1925      }      }
1926    pattern_count++;  
1927      if (f != stdin) fclose(f);
1928    }    }
1929    
1930  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times */
# Line 1117  for (j = 0; j < pattern_count; j++) Line 1945  for (j = 0; j < pattern_count; j++)
1945    
1946  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
1947    {    {
1948    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1949        pcretables);
1950    if (exclude_compiled == NULL)    if (exclude_compiled == NULL)
1951      {      {
1952      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
# Line 1128  if (exclude_pattern != NULL) Line 1957  if (exclude_pattern != NULL)
1957    
1958  if (include_pattern != NULL)  if (include_pattern != NULL)
1959    {    {
1960    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1961        pcretables);
1962    if (include_compiled == NULL)    if (include_compiled == NULL)
1963      {      {
1964      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
# Line 1137  if (include_pattern != NULL) Line 1967  if (include_pattern != NULL)
1967      }      }
1968    }    }
1969    
1970  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
1971    
1972  if (i >= argc) return pcregrep(stdin,  if (i >= argc)
1973    (filenames_only || filenames_nomatch_only)? stdin_name : NULL);    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1974    
1975  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
1976  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
1977  the file name if the argument is not a directory and filenames_only is not set.  the file name if the argument is not a directory and filenames are not
1978  */  otherwise forced. */
1979    
1980  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
1981    
1982  for (; i < argc; i++)  for (; i < argc; i++)
1983    {    {
1984    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1985        only_one_at_top);
1986    if (frc > 1) rc = frc;    if (frc > 1) rc = frc;
1987      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
1988    }    }

Legend:
Removed from v.77  
changed lines
  Added in v.93

  ViewVC Help
Powered by ViewVC 1.1.5