/[pcre]/code/tags/pcre-8.01/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-8.01/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 236 by ph10, Tue Sep 11 12:57:06 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
# Line 45  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
 #include "config.h"  
58  #include "pcre.h"  #include "pcre.h"
59    
60  #define FALSE 0  #define FALSE 0
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 62  POSSIBILITY OF SUCH DAMAGE.
62    
63  typedef int BOOL;  typedef int BOOL;
64    
 #define VERSION "4.0 07-Jun-2005"  
65  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
66    
67  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 64  typedef int BOOL; Line 70  typedef int BOOL;
70  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
71  #endif  #endif
72    
73    /* Values for the "filenames" variable, which specifies options for file name
74    output. The order is important; it is assumed that a file name is wanted for
75    all values greater than FN_DEFAULT. */
76    
77    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79    /* Actions for the -d and -D options */
80    
81    enum { dee_READ, dee_SKIP, dee_RECURSE };
82    enum { DEE_READ, DEE_SKIP };
83    
84    /* Actions for special processing options (flag bits) */
85    
86    #define PO_WORD_MATCH     0x0001
87    #define PO_LINE_MATCH     0x0002
88    #define PO_FIXED_STRINGS  0x0004
89    
90    /* Line ending types */
91    
92    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
96  /*************************************************  /*************************************************
97  *               Global variables                 *  *               Global variables                 *
98  *************************************************/  *************************************************/
99    
100    /* Jeffrey Friedl has some debugging requirements that are not part of the
101    regular code. */
102    
103    #ifdef JFRIEDL_DEBUG
104    static int S_arg = -1;
105    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107    static const char *jfriedl_prefix = "";
108    static const char *jfriedl_postfix = "";
109    #endif
110    
111    static int  endlinetype;
112    
113    static char *colour_string = (char *)"1;31";
114    static char *colour_option = NULL;
115    static char *dee_option = NULL;
116    static char *DEE_option = NULL;
117    static char *newline = NULL;
118  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
119  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
120    static char *locale = NULL;
121    
122    static const unsigned char *pcretables = NULL;
123    
124  static int  pattern_count = 0;  static int  pattern_count = 0;
125  static pcre **pattern_list;  static pcre **pattern_list = NULL;
126  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
127    
128  static char *include_pattern = NULL;  static char *include_pattern = NULL;
129  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 85  static pcre *exclude_compiled = NULL; Line 134  static pcre *exclude_compiled = NULL;
134  static int after_context = 0;  static int after_context = 0;
135  static int before_context = 0;  static int before_context = 0;
136  static int both_context = 0;  static int both_context = 0;
137    static int dee_action = dee_READ;
138    static int DEE_action = DEE_READ;
139    static int error_count = 0;
140    static int filenames = FN_DEFAULT;
141    static int process_options = 0;
142    
143  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
144  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
 static BOOL filenames_only = FALSE;  
 static BOOL filenames_nomatch_only = FALSE;  
145  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
146  static BOOL invert = FALSE;  static BOOL invert = FALSE;
147  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
148  static BOOL number = FALSE;  static BOOL number = FALSE;
149    static BOOL only_matching = FALSE;
150  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
 static BOOL recurse = FALSE;  
151  static BOOL silent = FALSE;  static BOOL silent = FALSE;
152  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
 static BOOL word_match = FALSE;  
153    
154  /* Structure for options and list of them */  /* Structure for options and list of them */
155    
156  enum { OP_NODATA, OP_STRING, OP_NUMBER };  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
157           OP_PATLIST };
158    
159  typedef struct option_item {  typedef struct option_item {
160    int type;    int type;
# Line 112  typedef struct option_item { Line 164  typedef struct option_item {
164    const char *help_text;    const char *help_text;
165  } option_item;  } option_item;
166    
167    /* Options without a single-letter equivalent get a negative value. This can be
168    used to identify them. */
169    
170    #define N_COLOUR    (-1)
171    #define N_EXCLUDE   (-2)
172    #define N_HELP      (-3)
173    #define N_INCLUDE   (-4)
174    #define N_LABEL     (-5)
175    #define N_LOCALE    (-6)
176    #define N_NULL      (-7)
177    
178  static option_item optionlist[] = {  static option_item optionlist[] = {
179    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
180    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
181    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
182    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
183    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
184    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
185    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
186    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
187    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
188    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
189    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
190    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
191    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
192    { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
193    { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
194    { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
195    { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
196    { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
197    { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
198    { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
199    { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
200    { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201    { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
202    { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
203    { OP_NODATA, 0,   NULL,               NULL,            NULL }    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
204      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
205      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
206      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
207    #ifdef JFRIEDL_DEBUG
208      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
209    #endif
210      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
211      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
212      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
213      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
214      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
215      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
216      { OP_NODATA,    0,        NULL,               NULL,            NULL }
217  };  };
218    
219    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221    that the combination of -w and -x has the same effect as -x on its own, so we
222    can treat them as the same. */
223    
224    static const char *prefix[] = {
225      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
226    
227    static const char *suffix[] = {
228      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
229    
230    /* UTF-8 tables - used only when the newline setting is "any". */
231    
232    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233    
234    const char utf8_table4[] = {
235      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239    
240    
241    
242  /*************************************************  /*************************************************
243  *       Functions for directory scanning         *  *            OS-specific functions               *
244  *************************************************/  *************************************************/
245    
246  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
247  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
248    
249    
250  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
251    
252  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253  #include <sys/types.h>  #include <sys/types.h>
254  #include <sys/stat.h>  #include <sys/stat.h>
255  #include <dirent.h>  #include <dirent.h>
# Line 184  for (;;) Line 281  for (;;)
281    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282      return dent->d_name;      return dent->d_name;
283    }    }
284  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
285  }  }
286    
287  static void  static void
# Line 194  closedir(dir); Line 291  closedir(dir);
291  }  }
292    
293    
294    /************* Test for regular file in Unix **********/
295    
296    static int
297    isregfile(char *filename)
298    {
299    struct stat statbuf;
300    if (stat(filename, &statbuf) < 0)
301      return 1;        /* In the expectation that opening as a file will fail */
302    return (statbuf.st_mode & S_IFMT) == S_IFREG;
303    }
304    
305    
306    /************* Test stdout for being a terminal in Unix **********/
307    
308    static BOOL
309    is_stdout_tty(void)
310    {
311    return isatty(fileno(stdout));
312    }
313    
314    
315  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
316    
317  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
# Line 201  Lionel Fourquaux. David Burgess added a Line 319  Lionel Fourquaux. David Burgess added a
319  when it did not exist. */  when it did not exist. */
320    
321    
322  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
323    
324  #ifndef STRICT  #ifndef STRICT
325  # define STRICT  # define STRICT
# Line 292  free(dir); Line 410  free(dir);
410  }  }
411    
412    
413    /************* Test for regular file in Win32 **********/
414    
415    /* I don't know how to do this, or if it can be done; assume all paths are
416    regular if they are not directories. */
417    
418    int isregfile(char *filename)
419    {
420    return !isdirectory(filename)
421    }
422    
423    
424    /************* Test stdout for being a terminal in Win32 **********/
425    
426    /* I don't know how to do this; assume never */
427    
428    static BOOL
429    is_stdout_tty(void)
430    {
431    FALSE;
432    }
433    
434    
435  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
436    
437  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 300  free(dir); Line 440  free(dir);
440    
441  typedef void directory_type;  typedef void directory_type;
442    
443  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
444  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
446  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
447    
448    
449    /************* Test for regular when we can't do it **********/
450    
451    /* Assume all files are regular. */
452    
453    int isregfile(char *filename) { return 1; }
454    
455    
456    /************* Test stdout for being a terminal when we can't do it **********/
457    
458    static BOOL
459    is_stdout_tty(void)
460    {
461    return FALSE;
462    }
463    
464    
465  #endif  #endif
466    
467    
468    
469  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
470  /*************************************************  /*************************************************
471  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
472  *************************************************/  *************************************************/
# Line 332  return sys_errlist[n]; Line 489  return sys_errlist[n];
489    
490    
491  /*************************************************  /*************************************************
492    *             Find end of line                   *
493    *************************************************/
494    
495    /* The length of the endline sequence that is found is set via lenptr. This may
496    be zero at the very end of the file if there is no line-ending sequence there.
497    
498    Arguments:
499      p         current position in line
500      endptr    end of available data
501      lenptr    where to put the length of the eol sequence
502    
503    Returns:    pointer to the last byte of the line
504    */
505    
506    static char *
507    end_of_line(char *p, char *endptr, int *lenptr)
508    {
509    switch(endlinetype)
510      {
511      default:      /* Just in case */
512      case EL_LF:
513      while (p < endptr && *p != '\n') p++;
514      if (p < endptr)
515        {
516        *lenptr = 1;
517        return p + 1;
518        }
519      *lenptr = 0;
520      return endptr;
521    
522      case EL_CR:
523      while (p < endptr && *p != '\r') p++;
524      if (p < endptr)
525        {
526        *lenptr = 1;
527        return p + 1;
528        }
529      *lenptr = 0;
530      return endptr;
531    
532      case EL_CRLF:
533      for (;;)
534        {
535        while (p < endptr && *p != '\r') p++;
536        if (++p >= endptr)
537          {
538          *lenptr = 0;
539          return endptr;
540          }
541        if (*p == '\n')
542          {
543          *lenptr = 2;
544          return p + 1;
545          }
546        }
547      break;
548    
549      case EL_ANYCRLF:
550      while (p < endptr)
551        {
552        int extra = 0;
553        register int c = *((unsigned char *)p);
554    
555        if (utf8 && c >= 0xc0)
556          {
557          int gcii, gcss;
558          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
559          gcss = 6*extra;
560          c = (c & utf8_table3[extra]) << gcss;
561          for (gcii = 1; gcii <= extra; gcii++)
562            {
563            gcss -= 6;
564            c |= (p[gcii] & 0x3f) << gcss;
565            }
566          }
567    
568        p += 1 + extra;
569    
570        switch (c)
571          {
572          case 0x0a:    /* LF */
573          *lenptr = 1;
574          return p;
575    
576          case 0x0d:    /* CR */
577          if (p < endptr && *p == 0x0a)
578            {
579            *lenptr = 2;
580            p++;
581            }
582          else *lenptr = 1;
583          return p;
584    
585          default:
586          break;
587          }
588        }   /* End of loop for ANYCRLF case */
589    
590      *lenptr = 0;  /* Must have hit the end */
591      return endptr;
592    
593      case EL_ANY:
594      while (p < endptr)
595        {
596        int extra = 0;
597        register int c = *((unsigned char *)p);
598    
599        if (utf8 && c >= 0xc0)
600          {
601          int gcii, gcss;
602          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
603          gcss = 6*extra;
604          c = (c & utf8_table3[extra]) << gcss;
605          for (gcii = 1; gcii <= extra; gcii++)
606            {
607            gcss -= 6;
608            c |= (p[gcii] & 0x3f) << gcss;
609            }
610          }
611    
612        p += 1 + extra;
613    
614        switch (c)
615          {
616          case 0x0a:    /* LF */
617          case 0x0b:    /* VT */
618          case 0x0c:    /* FF */
619          *lenptr = 1;
620          return p;
621    
622          case 0x0d:    /* CR */
623          if (p < endptr && *p == 0x0a)
624            {
625            *lenptr = 2;
626            p++;
627            }
628          else *lenptr = 1;
629          return p;
630    
631          case 0x85:    /* NEL */
632          *lenptr = utf8? 2 : 1;
633          return p;
634    
635          case 0x2028:  /* LS */
636          case 0x2029:  /* PS */
637          *lenptr = 3;
638          return p;
639    
640          default:
641          break;
642          }
643        }   /* End of loop for ANY case */
644    
645      *lenptr = 0;  /* Must have hit the end */
646      return endptr;
647      }     /* End of overall switch */
648    }
649    
650    
651    
652    /*************************************************
653    *         Find start of previous line            *
654    *************************************************/
655    
656    /* This is called when looking back for before lines to print.
657    
658    Arguments:
659      p         start of the subsequent line
660      startptr  start of available data
661    
662    Returns:    pointer to the start of the previous line
663    */
664    
665    static char *
666    previous_line(char *p, char *startptr)
667    {
668    switch(endlinetype)
669      {
670      default:      /* Just in case */
671      case EL_LF:
672      p--;
673      while (p > startptr && p[-1] != '\n') p--;
674      return p;
675    
676      case EL_CR:
677      p--;
678      while (p > startptr && p[-1] != '\n') p--;
679      return p;
680    
681      case EL_CRLF:
682      for (;;)
683        {
684        p -= 2;
685        while (p > startptr && p[-1] != '\n') p--;
686        if (p <= startptr + 1 || p[-2] == '\r') return p;
687        }
688      return p;   /* But control should never get here */
689    
690      case EL_ANY:
691      case EL_ANYCRLF:
692      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693      if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
695      while (p > startptr)
696        {
697        register int c;
698        char *pp = p - 1;
699    
700        if (utf8)
701          {
702          int extra = 0;
703          while ((*pp & 0xc0) == 0x80) pp--;
704          c = *((unsigned char *)pp);
705          if (c >= 0xc0)
706            {
707            int gcii, gcss;
708            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
709            gcss = 6*extra;
710            c = (c & utf8_table3[extra]) << gcss;
711            for (gcii = 1; gcii <= extra; gcii++)
712              {
713              gcss -= 6;
714              c |= (pp[gcii] & 0x3f) << gcss;
715              }
716            }
717          }
718        else c = *((unsigned char *)pp);
719    
720        if (endlinetype == EL_ANYCRLF) switch (c)
721          {
722          case 0x0a:    /* LF */
723          case 0x0d:    /* CR */
724          return p;
725    
726          default:
727          break;
728          }
729    
730        else switch (c)
731          {
732          case 0x0a:    /* LF */
733          case 0x0b:    /* VT */
734          case 0x0c:    /* FF */
735          case 0x0d:    /* CR */
736          case 0x85:    /* NEL */
737          case 0x2028:  /* LS */
738          case 0x2029:  /* PS */
739          return p;
740    
741          default:
742          break;
743          }
744    
745        p = pp;  /* Back one character */
746        }        /* End of loop for ANY case */
747    
748      return startptr;  /* Hit start of data */
749      }     /* End of overall switch */
750    }
751    
752    
753    
754    
755    
756    /*************************************************
757  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
758  *************************************************/  *************************************************/
759    
760  /* This is called if we are about to lose said lines because of buffer filling,  /* This is called if we are about to lose said lines because of buffer filling,
761  and at the end of the file.  and at the end of the file. The data in the line is written using fwrite() so
762    that a binary zero does not terminate it.
763    
764  Arguments:  Arguments:
765    lastmatchnumber   the number of the last matching line, plus one    lastmatchnumber   the number of the last matching line, plus one
# Line 355  if (after_context > 0 && lastmatchnumber Line 778  if (after_context > 0 && lastmatchnumber
778    int count = 0;    int count = 0;
779    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
780      {      {
781        int ellength;
782      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
783      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
784      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
786      fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
788      }      }
789    hyphenpending = TRUE;    hyphenpending = TRUE;
790    }    }
# Line 417  way, the buffer is shifted left and re-f Line 841  way, the buffer is shifted left and re-f
841    
842  while (ptr < endptr)  while (ptr < endptr)
843    {    {
844    int i;    int i, endlinelength;
845      int mrc = 0;
846    BOOL match = FALSE;    BOOL match = FALSE;
847    char *t = ptr;    char *t = ptr;
848    size_t length, linelength;    size_t length, linelength;
# Line 429  while (ptr < endptr) Line 854  while (ptr < endptr)
854    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
855    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
856    
857    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
858    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
859    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
860    
861      /* Extra processing for Jeffrey Friedl's debugging. */
862    
863    #ifdef JFRIEDL_DEBUG
864      if (jfriedl_XT || jfriedl_XR)
865      {
866          #include <sys/time.h>
867          #include <time.h>
868          struct timeval start_time, end_time;
869          struct timezone dummy;
870    
871          if (jfriedl_XT)
872          {
873              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
874              const char *orig = ptr;
875              ptr = malloc(newlen + 1);
876              if (!ptr) {
877                      printf("out of memory");
878                      exit(2);
879              }
880              endptr = ptr;
881              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
882              for (i = 0; i < jfriedl_XT; i++) {
883                      strncpy(endptr, orig,  length);
884                      endptr += length;
885              }
886              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
887              length = newlen;
888          }
889    
890          if (gettimeofday(&start_time, &dummy) != 0)
891                  perror("bad gettimeofday");
892    
893    
894          for (i = 0; i < jfriedl_XR; i++)
895              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
896    
897          if (gettimeofday(&end_time, &dummy) != 0)
898                  perror("bad gettimeofday");
899    
900          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
901                          -
902                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
903    
904          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
905          return 0;
906      }
907    #endif
908    
909    
910    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
911    the final newline in the subject string. */    the final newline in the subject string. */
912    
913    for (i = 0; !match && i < pattern_count; i++)    for (i = 0; i < pattern_count; i++)
914      {      {
915      match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
916        offsets, 99) >= 0;        offsets, 99);
917        if (mrc >= 0) { match = TRUE; break; }
918        if (mrc != PCRE_ERROR_NOMATCH)
919          {
920          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
921          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
922          fprintf(stderr, "this line:\n");
923          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
924          fprintf(stderr, "\n");
925          if (error_count == 0 &&
926              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
927            {
928            fprintf(stderr, "pcregrep: error %d means that a resource limit "
929              "was exceeded\n", mrc);
930            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
931            }
932          if (error_count++ > 20)
933            {
934            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
935            exit(2);
936            }
937          match = invert;    /* No more matching; don't show the line again */
938          break;
939          }
940      }      }
941    
942    /* If it's a match or a not-match (as required), print what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
943    
944    if (match != invert)    if (match != invert)
945      {      {
946      BOOL hyphenprinted = FALSE;      BOOL hyphenprinted = FALSE;
947    
948      if (filenames_nomatch_only) return 1;      /* We've failed if we want a file that doesn't have any matches. */
949    
950        if (filenames == FN_NOMATCH_ONLY) return 1;
951    
952        /* Just count if just counting is wanted. */
953    
954      if (count_only) count++;      if (count_only) count++;
955    
956      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
957        in the file. */
958    
959        else if (filenames == FN_ONLY)
960        {        {
961        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
962        return 0;        return 0;
963        }        }
964    
965        /* Likewise, if all we want is a yes/no answer. */
966    
967      else if (quiet) return 0;      else if (quiet) return 0;
968    
969        /* The --only-matching option prints just the substring that matched, and
970        does not pring any context. */
971    
972        else if (only_matching)
973          {
974          if (printname != NULL) fprintf(stdout, "%s:", printname);
975          if (number) fprintf(stdout, "%d:", linenumber);
976          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
977          fprintf(stdout, "\n");
978          }
979    
980        /* This is the default case when none of the above options is set. We print
981        the matching lines(s), possibly preceded and/or followed by other lines of
982        context. */
983    
984      else      else
985        {        {
986        /* See if there is a requirement to print some "after" lines from a        /* See if there is a requirement to print some "after" lines from a
# Line 467  while (ptr < endptr) Line 988  while (ptr < endptr)
988    
989        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
990          {          {
991            int ellength;
992          int linecount = 0;          int linecount = 0;
993          char *p = lastmatchrestart;          char *p = lastmatchrestart;
994    
995          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
996            {            {
997            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
998            linecount++;            linecount++;
999            }            }
1000    
1001          /* It is important to advance lastmatchrestart during this printing so          /* It is important to advance lastmatchrestart during this printing so
1002          that it interacts correctly with any "before" printing below. */          that it interacts correctly with any "before" printing below. Print
1003            each line's data using fwrite() in case there are binary zeroes. */
1004    
1005          while (lastmatchrestart < p)          while (lastmatchrestart < p)
1006            {            {
1007            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1008            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1009            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1010            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1011            fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1012            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1013            }            }
1014          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1015          }          }
# Line 510  while (ptr < endptr) Line 1032  while (ptr < endptr)
1032          char *p = ptr;          char *p = ptr;
1033    
1034          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1035                 linecount++ < before_context)                 linecount < before_context)
1036            {            {
1037            p--;            linecount++;
1038            while (p > buffer && p[-1] != '\n') p--;            p = previous_line(p, buffer);
1039            }            }
1040    
1041          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 521  while (ptr < endptr) Line 1043  while (ptr < endptr)
1043    
1044          while (p < ptr)          while (p < ptr)
1045            {            {
1046              int ellength;
1047            char *pp = p;            char *pp = p;
1048            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1049            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1050            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1051            fprintf(stdout, "%.*s", pp - p + 1, p);            fwrite(p, 1, pp - p, stdout);
1052            p = pp + 1;            p = pp;
1053            }            }
1054          }          }
1055    
1056        /* Now print the matching line(s); ensure we set hyphenpending at the end        /* Now print the matching line(s); ensure we set hyphenpending at the end
1057        of the file. */        of the file if any context lines are being output. */
1058    
1059          if (after_context > 0 || before_context > 0)
1060            endhyphenpending = TRUE;
1061    
       endhyphenpending = TRUE;  
1062        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (printname != NULL) fprintf(stdout, "%s:", printname);
1063        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
1064    
1065        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1066        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1067        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1068        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069          the match will always be before the first newline sequence. */
1070    
1071        if (multiline)        if (multiline)
1072          {          {
1073          char *endmatch = ptr + offsets[1];          int ellength;
1074          t = ptr;          char *endmatch = ptr;
1075          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          if (!invert)
1076          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1077          linelength = endmatch - ptr;            endmatch += offsets[1];
1078              t = ptr;
1079              while (t < endmatch)
1080                {
1081                t = end_of_line(t, endptr, &ellength);
1082                if (t <= endmatch) linenumber++; else break;
1083                }
1084              }
1085            endmatch = end_of_line(endmatch, endptr, &ellength);
1086            linelength = endmatch - ptr - ellength;
1087          }          }
1088    
1089        fprintf(stdout, "%.*s\n", linelength, ptr);        /*** NOTE: Use only fwrite() to output the data line, so that binary
1090          zeroes are treated as just another data character. */
1091    
1092          /* This extra option, for Jeffrey Friedl's debugging requirements,
1093          replaces the matched string, or a specific captured string if it exists,
1094          with X. When this happens, colouring is ignored. */
1095    
1096    #ifdef JFRIEDL_DEBUG
1097          if (S_arg >= 0 && S_arg < mrc)
1098            {
1099            int first = S_arg * 2;
1100            int last  = first + 1;
1101            fwrite(ptr, 1, offsets[first], stdout);
1102            fprintf(stdout, "X");
1103            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1104            }
1105          else
1106    #endif
1107    
1108          /* We have to split the line(s) up if colouring. */
1109    
1110          if (do_colour)
1111            {
1112            fwrite(ptr, 1, offsets[0], stdout);
1113            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1114            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1115            fprintf(stdout, "%c[00m", 0x1b);
1116            fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1117            }
1118          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1119        }        }
1120    
1121        /* End of doing what has to be done for a match */
1122    
1123      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1124    
1125      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1126      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1127    
1128      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1129      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1130      }      }
1131    
1132      /* For a match in multiline inverted mode (which of course did not cause
1133      anything to be printed), we have to move on to the end of the match before
1134      proceeding. */
1135    
1136      if (multiline && invert && match)
1137        {
1138        int ellength;
1139        char *endmatch = ptr + offsets[1];
1140        t = ptr;
1141        while (t < endmatch)
1142          {
1143          t = end_of_line(t, endptr, &ellength);
1144          if (t <= endmatch) linenumber++; else break;
1145          }
1146        endmatch = end_of_line(endmatch, endptr, &ellength);
1147        linelength = endmatch - ptr - ellength;
1148        }
1149    
1150    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1151    
1152    ptr += linelength + 1;    ptr += linelength + endlinelength;
1153    linenumber++;    linenumber++;
1154    
1155    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 599  while (ptr < endptr) Line 1183  while (ptr < endptr)
1183  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1184  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1185    
1186  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);  if (!only_matching && !count_only)
1187  hyphenpending |= endhyphenpending;    {
1188      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1189      hyphenpending |= endhyphenpending;
1190      }
1191    
1192  /* Print the file name if we are looking for those without matches and there  /* Print the file name if we are looking for those without matches and there
1193  were none. If we found a match, we won't have got this far. */  were none. If we found a match, we won't have got this far. */
1194    
1195  if (filenames_nomatch_only)  if (filenames == FN_NOMATCH_ONLY)
1196    {    {
1197    fprintf(stdout, "%s\n", printname);    fprintf(stdout, "%s\n", printname);
1198    return 0;    return 0;
# Line 633  recursing; if it's a file, grep it. Line 1220  recursing; if it's a file, grep it.
1220    
1221  Arguments:  Arguments:
1222    pathname          the path to investigate    pathname          the path to investigate
1223    dir_recurse       TRUE if recursing is wanted (-r)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   show_filenames    TRUE if file names are wanted for multiple files, except  
                       for the only file at top level when not filenames_only  
1224    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1225    
1226  Returns:   0 if there was at least one match  Returns:   0 if there was at least one match
# Line 646  However, file opening failures are suppr Line 1231  However, file opening failures are suppr
1231  */  */
1232    
1233  static int  static int
1234  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
1235  {  {
1236  int rc = 1;  int rc = 1;
1237  int sep;  int sep;
1238  FILE *in;  FILE *in;
 char *printname;  
1239    
1240  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1241    
1242  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1243    {    {
1244    return pcregrep(stdin,    return pcregrep(stdin,
1245      (filenames_only || filenames_nomatch_only ||      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
     (show_filenames && !only_one_at_top))?  
1246        stdin_name : NULL);        stdin_name : NULL);
1247    }    }
1248    
 /* If the file is a directory and we are recursing, scan each file within it,  
 subject to any include or exclude patterns that were set. The scanning code is  
 localized so it can be made system-specific. */  
1249    
1250  if ((sep = isdirectory(pathname)) != 0 && dir_recurse)  /* If the file is a directory, skip if skipping or if we are recursing, scan
1251    {  each file within it, subject to any include or exclude patterns that were set.
1252    char buffer[1024];  The scanning code is localized so it can be made system-specific. */
   char *nextfile;  
   directory_type *dir = opendirectory(pathname);  
1253    
1254    if (dir == NULL)  if ((sep = isdirectory(pathname)) != 0)
1255      {    {
1256      if (!silent)    if (dee_action == dee_SKIP) return 1;
1257        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,    if (dee_action == dee_RECURSE)
         strerror(errno));  
     return 2;  
     }  
   
   while ((nextfile = readdirectory(dir)) != NULL)  
1258      {      {
1259      int frc, blen;      char buffer[1024];
1260      sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);      char *nextfile;
1261      blen = strlen(buffer);      directory_type *dir = opendirectory(pathname);
1262    
1263      if (exclude_compiled != NULL &&      if (dir == NULL)
1264          pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)        {
1265        continue;        if (!silent)
1266            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1267              strerror(errno));
1268          return 2;
1269          }
1270    
1271      if (include_compiled != NULL &&      while ((nextfile = readdirectory(dir)) != NULL)
1272          pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)        {
1273        continue;        int frc, blen;
1274          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1275          blen = strlen(buffer);
1276    
1277          if (exclude_compiled != NULL &&
1278              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1279            continue;
1280    
1281          if (include_compiled != NULL &&
1282              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1283            continue;
1284    
1285          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1286          if (frc > 1) rc = frc;
1287           else if (frc == 0 && rc == 1) rc = 0;
1288          }
1289    
1290      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);      closedirectory(dir);
1291      if (frc > 1) rc = frc;      return rc;
      else if (frc == 0 && rc == 1) rc = 0;  
1292      }      }
   
   closedirectory(dir);  
   return rc;  
1293    }    }
1294    
1295  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* If the file is not a directory and not a regular file, skip it if that's
1296  the first and only argument at top level, we don't show the file name (unless  been requested. */
1297  we are only showing the file name). Otherwise, control is via the  
1298  show_filenames variable. */  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1299    
1300    /* Control reaches here if we have a regular file, or if we have a directory
1301    and recursion or skipping was not requested, or if we have anything else and
1302    skipping was not requested. The scan proceeds. If this is the first and only
1303    argument at top level, we don't show the file name, unless we are only showing
1304    the file name, or the filename was forced (-H). */
1305    
1306  in = fopen(pathname, "r");  in = fopen(pathname, "r");
1307  if (in == NULL)  if (in == NULL)
# Line 719  if (in == NULL) Line 1312  if (in == NULL)
1312    return 2;    return 2;
1313    }    }
1314    
1315  printname =  (filenames_only || filenames_nomatch_only ||  rc = pcregrep(in, (filenames > FN_DEFAULT ||
1316    (show_filenames && !only_one_at_top))? pathname : NULL;    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
   
 rc = pcregrep(in, printname);  
1317    
1318  fclose(in);  fclose(in);
1319  return rc;  return rc;
# Line 738  return rc; Line 1329  return rc;
1329  static int  static int
1330  usage(int rc)  usage(int rc)
1331  {  {
1332  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");  option_item *op;
1333    fprintf(stderr, "Usage: pcregrep [-");
1334    for (op = optionlist; op->one_char != 0; op++)
1335      {
1336      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1337      }
1338    fprintf(stderr, "] [long options] [pattern] [files]\n");
1339  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1340  return rc;  return rc;
1341  }  }
# Line 757  option_item *op; Line 1354  option_item *op;
1354    
1355  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1356  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1357  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1358  printf("\"-\" can be used as a file name to mean STDIN.\n");  printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1359  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1360    
1361  printf("Options:\n");  printf("Options:\n");
# Line 794  handle_option(int letter, int options) Line 1391  handle_option(int letter, int options)
1391  {  {
1392  switch(letter)  switch(letter)
1393    {    {
1394    case -1:  help(); exit(0);    case N_HELP: help(); exit(0);
1395    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1396    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1397      case 'H': filenames = FN_FORCE; break;
1398      case 'h': filenames = FN_NONE; break;
1399    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1400    case 'l': filenames_only = TRUE; break;    case 'l': filenames = FN_ONLY; break;
1401    case 'L': filenames_nomatch_only = TRUE; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1402    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1403    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1404      case 'o': only_matching = TRUE; break;
1405    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1406    case 'r': recurse = TRUE; break;    case 'r': dee_action = dee_RECURSE; break;
1407    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1408    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1409    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1410    case 'w': word_match = TRUE; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1411    case 'x': whole_lines = TRUE; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1412    
1413    case 'V':    case 'V':
1414    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1415    exit(0);    exit(0);
1416    break;    break;
1417    
# Line 828  return options; Line 1427  return options;
1427    
1428    
1429  /*************************************************  /*************************************************
1430    *          Construct printed ordinal             *
1431    *************************************************/
1432    
1433    /* This turns a number into "1st", "3rd", etc. */
1434    
1435    static char *
1436    ordin(int n)
1437    {
1438    static char buffer[8];
1439    char *p = buffer;
1440    sprintf(p, "%d", n);
1441    while (*p != 0) p++;
1442    switch (n%10)
1443      {
1444      case 1: strcpy(p, "st"); break;
1445      case 2: strcpy(p, "nd"); break;
1446      case 3: strcpy(p, "rd"); break;
1447      default: strcpy(p, "th"); break;
1448      }
1449    return buffer;
1450    }
1451    
1452    
1453    
1454    /*************************************************
1455    *          Compile a single pattern              *
1456    *************************************************/
1457    
1458    /* When the -F option has been used, this is called for each substring.
1459    Otherwise it's called for each supplied pattern.
1460    
1461    Arguments:
1462      pattern        the pattern string
1463      options        the PCRE options
1464      filename       the file name, or NULL for a command-line pattern
1465      count          0 if this is the only command line pattern, or
1466                     number of the command line pattern, or
1467                     linenumber for a pattern from a file
1468    
1469    Returns:         TRUE on success, FALSE after an error
1470    */
1471    
1472    static BOOL
1473    compile_single_pattern(char *pattern, int options, char *filename, int count)
1474    {
1475    char buffer[MBUFTHIRD + 16];
1476    const char *error;
1477    int errptr;
1478    
1479    if (pattern_count >= MAX_PATTERN_COUNT)
1480      {
1481      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1482        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1483      return FALSE;
1484      }
1485    
1486    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1487      suffix[process_options]);
1488    pattern_list[pattern_count] =
1489      pcre_compile(buffer, options, &error, &errptr, pcretables);
1490    if (pattern_list[pattern_count] != NULL)
1491      {
1492      pattern_count++;
1493      return TRUE;
1494      }
1495    
1496    /* Handle compile errors */
1497    
1498    errptr -= (int)strlen(prefix[process_options]);
1499    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1500    
1501    if (filename == NULL)
1502      {
1503      if (count == 0)
1504        fprintf(stderr, "pcregrep: Error in command-line regex "
1505          "at offset %d: %s\n", errptr, error);
1506      else
1507        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1508          "at offset %d: %s\n", ordin(count), errptr, error);
1509      }
1510    else
1511      {
1512      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1513        "at offset %d: %s\n", count, filename, errptr, error);
1514      }
1515    
1516    return FALSE;
1517    }
1518    
1519    
1520    
1521    /*************************************************
1522    *           Compile one supplied pattern         *
1523    *************************************************/
1524    
1525    /* When the -F option has been used, each string may be a list of strings,
1526    separated by line breaks. They will be matched literally.
1527    
1528    Arguments:
1529      pattern        the pattern string
1530      options        the PCRE options
1531      filename       the file name, or NULL for a command-line pattern
1532      count          0 if this is the only command line pattern, or
1533                     number of the command line pattern, or
1534                     linenumber for a pattern from a file
1535    
1536    Returns:         TRUE on success, FALSE after an error
1537    */
1538    
1539    static BOOL
1540    compile_pattern(char *pattern, int options, char *filename, int count)
1541    {
1542    if ((process_options & PO_FIXED_STRINGS) != 0)
1543      {
1544      char *eop = pattern + strlen(pattern);
1545      char buffer[MBUFTHIRD];
1546      for(;;)
1547        {
1548        int ellength;
1549        char *p = end_of_line(pattern, eop, &ellength);
1550        if (ellength == 0)
1551          return compile_single_pattern(pattern, options, filename, count);
1552        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1553        pattern = p;
1554        if (!compile_single_pattern(buffer, options, filename, count))
1555          return FALSE;
1556        }
1557      }
1558    else return compile_single_pattern(pattern, options, filename, count);
1559    }
1560    
1561    
1562    
1563    /*************************************************
1564  *                Main program                    *  *                Main program                    *
1565  *************************************************/  *************************************************/
1566    
# Line 838  main(int argc, char **argv) Line 1571  main(int argc, char **argv)
1571  {  {
1572  int i, j;  int i, j;
1573  int rc = 1;  int rc = 1;
1574  int options = 0;  int pcre_options = 0;
1575    int cmd_pattern_count = 0;
1576    int hint_count = 0;
1577  int errptr;  int errptr;
 const char *error;  
1578  BOOL only_one_at_top;  BOOL only_one_at_top;
1579    char *patterns[MAX_PATTERN_COUNT];
1580    const char *locale_from = "--locale";
1581    const char *error;
1582    
1583    /* Set the default line ending value from the default in the PCRE library;
1584    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1585    */
1586    
1587    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1588    switch(i)
1589      {
1590      default:                 newline = (char *)"lf"; break;
1591      case '\r':               newline = (char *)"cr"; break;
1592      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1593      case -1:                 newline = (char *)"any"; break;
1594      case -2:                 newline = (char *)"anycrlf"; break;
1595      }
1596    
1597  /* Process the options */  /* Process the options */
1598    
# Line 855  for (i = 1; i < argc; i++) Line 1606  for (i = 1; i < argc; i++)
1606    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1607    
1608    /* If we hit an argument that is just "-", it may be a reference to STDIN,    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1609    but only if we have previously had -f to define the patterns. */    but only if we have previously had -e or -f to define the patterns. */
1610    
1611    if (argv[i][1] == 0)    if (argv[i][1] == 0)
1612      {      {
1613      if (pattern_filename != NULL) break;      if (pattern_filename != NULL || pattern_count > 0) break;
1614        else exit(usage(2));        else exit(usage(2));
1615      }      }
1616    
# Line 881  for (i = 1; i < argc; i++) Line 1632  for (i = 1; i < argc; i++)
1632      /* Some long options have data that follows after =, for example file=name.      /* Some long options have data that follows after =, for example file=name.
1633      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
1634      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
1635      with Jeff Friedl in preferring "regex" without the "p". These options are      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1636      entered in the table as "regex(p)". No option is in both these categories,      These options are entered in the table as "regex(p)". No option is in both
1637      fortunately. */      these categories, fortunately. */
1638    
1639      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1640        {        {
# Line 898  for (i = 1; i < argc; i++) Line 1649  for (i = 1; i < argc; i++)
1649          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1650            {            {
1651            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1652            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1653            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1654              {              {
1655              option_data = arg + arglen;              option_data = arg + arglen;
# Line 917  for (i = 1; i < argc; i++) Line 1668  for (i = 1; i < argc; i++)
1668          char buff2[24];          char buff2[24];
1669          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1670          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1671          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1672            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1673          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1674            break;            break;
1675          }          }
# Line 931  for (i = 1; i < argc; i++) Line 1682  for (i = 1; i < argc; i++)
1682        }        }
1683      }      }
1684    
1685    
1686      /* Jeffrey Friedl's debugging harness uses these additional options which
1687      are not in the right form for putting in the option table because they use
1688      only one hyphen, yet are more than one character long. By putting them
1689      separately here, they will not get displayed as part of the help() output,
1690      but I don't think Jeffrey will care about that. */
1691    
1692    #ifdef JFRIEDL_DEBUG
1693      else if (strcmp(argv[i], "-pre") == 0) {
1694              jfriedl_prefix = argv[++i];
1695              continue;
1696      } else if (strcmp(argv[i], "-post") == 0) {
1697              jfriedl_postfix = argv[++i];
1698              continue;
1699      } else if (strcmp(argv[i], "-XT") == 0) {
1700              sscanf(argv[++i], "%d", &jfriedl_XT);
1701              continue;
1702      } else if (strcmp(argv[i], "-XR") == 0) {
1703              sscanf(argv[++i], "%d", &jfriedl_XR);
1704              continue;
1705      }
1706    #endif
1707    
1708    
1709    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
1710    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
1711    
# Line 953  for (i = 1; i < argc; i++) Line 1728  for (i = 1; i < argc; i++)
1728          option_data = s+1;          option_data = s+1;
1729          break;          break;
1730          }          }
1731        options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1732        }        }
1733      }      }
1734    
1735    /* At this point we should have op pointing to a matched option */    /* At this point we should have op pointing to a matched option. If the type
1736      is NO_DATA, it means that there is no data, and the option might set
1737      something in the PCRE options. */
1738    
1739    if (op->type == OP_NODATA)    if (op->type == OP_NODATA)
     options = handle_option(op->one_char, options);  
   else  
1740      {      {
1741      if (*option_data == 0)      pcre_options = handle_option(op->one_char, pcre_options);
1742        continue;
1743        }
1744    
1745      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1746      either has a value or defaults to something. It cannot have data in a
1747      separate item. At the moment, the only such options are "colo(u)r" and
1748      Jeffrey Friedl's special -S debugging option. */
1749    
1750      if (*option_data == 0 &&
1751          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1752        {
1753        switch (op->one_char)
1754        {        {
1755        if (i >= argc - 1 || longopwasequals)        case N_COLOUR:
1756          {        colour_option = (char *)"auto";
1757          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        break;
1758          exit(usage(2));  #ifdef JFRIEDL_DEBUG
1759          }        case 'S':
1760        option_data = argv[++i];        S_arg = 0;
1761          break;
1762    #endif
1763        }        }
1764        continue;
1765        }
1766    
1767      if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else    /* Otherwise, find the data string for the option. */
1768    
1769      if (*option_data == 0)
1770        {
1771        if (i >= argc - 1 || longopwasequals)
1772          {
1773          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1774          exit(usage(2));
1775          }
1776        option_data = argv[++i];
1777        }
1778    
1779      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1780      multiple times to create a list of patterns. */
1781    
1782      if (op->type == OP_PATLIST)
1783        {
1784        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1785        {        {
1786        char *endptr;        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1787        int n = strtoul(option_data, &endptr, 10);          MAX_PATTERN_COUNT);
1788        if (*endptr != 0)        return 2;
1789          }
1790        patterns[cmd_pattern_count++] = option_data;
1791        }
1792    
1793      /* Otherwise, deal with single string or numeric data values. */
1794    
1795      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1796        {
1797        *((char **)op->dataptr) = option_data;
1798        }
1799      else
1800        {
1801        char *endptr;
1802        int n = strtoul(option_data, &endptr, 10);
1803        if (*endptr != 0)
1804          {
1805          if (longop)
1806          {          {
1807          if (longop)          char *equals = strchr(op->long_name, '=');
1808            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1809              option_data, op->long_name);            equals - op->long_name;
1810          else          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1811            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",            option_data, nlen, op->long_name);
             option_data, op->one_char);  
         exit(usage(2));  
1812          }          }
1813        *((int *)op->dataptr) = n;        else
1814            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1815              option_data, op->one_char);
1816          exit(usage(2));
1817        }        }
1818        *((int *)op->dataptr) = n;
1819      }      }
1820    }    }
1821    
# Line 1001  if (both_context > 0) Line 1828  if (both_context > 0)
1828    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
1829    }    }
1830    
1831  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* If a locale has not been provided as an option, see if the LC_CTYPE or
1832  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  LC_ALL environment variable is set, and if so, use it. */
1833    
1834  if (pattern_list == NULL || hints_list == NULL)  if (locale == NULL)
1835    {    {
1836    fprintf(stderr, "pcregrep: malloc failed\n");    locale = getenv("LC_ALL");
1837    return 2;    locale_from = "LCC_ALL";
1838    }    }
1839    
1840  /* Compile the regular expression(s). */  if (locale == NULL)
   
 if (pattern_filename != NULL)  
1841    {    {
1842    FILE *f = fopen(pattern_filename, "r");    locale = getenv("LC_CTYPE");
1843    char buffer[MBUFTHIRD + 16];    locale_from = "LC_CTYPE";
1844    char *rdstart;    }
   int adjust = 0;  
1845    
1846    if (f == NULL)  /* If a locale has been provided, set it, and generate the tables the PCRE
1847    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1848    
1849    if (locale != NULL)
1850      {
1851      if (setlocale(LC_CTYPE, locale) == NULL)
1852      {      {
1853      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1854        strerror(errno));        locale, locale_from);
1855      return 2;      return 2;
1856      }      }
1857      pcretables = pcre_maketables();
1858      }
1859    
1860    if (whole_lines)  /* Sort out colouring */
1861    
1862    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1863      {
1864      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1865      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1866      else
1867      {      {
1868      strcpy(buffer, "^(?:");      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1869      adjust = 4;        colour_option);
1870        return 2;
1871      }      }
1872    else if (word_match)    if (do_colour)
1873      {      {
1874      strcpy(buffer, "\\b");      char *cs = getenv("PCREGREP_COLOUR");
1875      adjust = 2;      if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1876        if (cs != NULL) colour_string = cs;
1877      }      }
1878      }
1879    
1880    rdstart = buffer + adjust;  /* Interpret the newline type; the default settings are Unix-like. */
1881    while (fgets(rdstart, MBUFTHIRD, f) != NULL)  
1882    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1883      {
1884      pcre_options |= PCRE_NEWLINE_CR;
1885      endlinetype = EL_CR;
1886      }
1887    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1888      {
1889      pcre_options |= PCRE_NEWLINE_LF;
1890      endlinetype = EL_LF;
1891      }
1892    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1893      {
1894      pcre_options |= PCRE_NEWLINE_CRLF;
1895      endlinetype = EL_CRLF;
1896      }
1897    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1898      {
1899      pcre_options |= PCRE_NEWLINE_ANY;
1900      endlinetype = EL_ANY;
1901      }
1902    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1903      {
1904      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1905      endlinetype = EL_ANYCRLF;
1906      }
1907    else
1908      {
1909      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1910      return 2;
1911      }
1912    
1913    /* Interpret the text values for -d and -D */
1914    
1915    if (dee_option != NULL)
1916      {
1917      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1918      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1919      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1920      else
1921      {      {
1922      char *s = rdstart + (int)strlen(rdstart);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1923      if (pattern_count >= MAX_PATTERN_COUNT)      return 2;
1924        {      }
1925        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",    }
1926          MAX_PATTERN_COUNT);  
1927        return 2;  if (DEE_option != NULL)
1928        }    {
1929      while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;    if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1930      if (s == rdstart) continue;    else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1931      if (whole_lines) strcpy(s, ")$");    else
1932        else if (word_match)strcpy(s, "\\b");      {
1933          else *s = 0;      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1934      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,      return 2;
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr - adjust, error);  
       return 2;  
       }  
1935      }      }
   fclose(f);  
1936    }    }
1937    
1938  /* If no file name, a single regex must be given inline. */  /* Check the values for Jeffrey Friedl's debugging options. */
1939    
1940  else  #ifdef JFRIEDL_DEBUG
1941    if (S_arg > 9)
1942    {    {
1943    char buffer[MBUFTHIRD + 16];    fprintf(stderr, "pcregrep: bad value for -S option\n");
1944    char *pat;    return 2;
1945    int adjust = 0;    }
1946    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1947      {
1948      if (jfriedl_XT == 0) jfriedl_XT = 1;
1949      if (jfriedl_XR == 0) jfriedl_XR = 1;
1950      }
1951    #endif
1952    
1953    /* Get memory to store the pattern and hints lists. */
1954    
1955    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1956    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1957    
1958    if (pattern_list == NULL || hints_list == NULL)
1959      {
1960      fprintf(stderr, "pcregrep: malloc failed\n");
1961      goto EXIT2;
1962      }
1963    
1964    /* If no patterns were provided by -e, and there is no file provided by -f,
1965    the first argument is the one and only pattern, and it must exist. */
1966    
1967    if (cmd_pattern_count == 0 && pattern_filename == NULL)
1968      {
1969    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
1970      patterns[cmd_pattern_count++] = argv[i++];
1971      }
1972    
1973    if (whole_lines)  /* Compile the patterns that were provided on the command line, either by
1974    multiple uses of -e or as a single unkeyed pattern. */
1975    
1976    for (j = 0; j < cmd_pattern_count; j++)
1977      {
1978      if (!compile_pattern(patterns[j], pcre_options, NULL,
1979           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1980        goto EXIT2;
1981      }
1982    
1983    /* Compile the regular expressions that are provided in a file. */
1984    
1985    if (pattern_filename != NULL)
1986      {
1987      int linenumber = 0;
1988      FILE *f;
1989      char *filename;
1990      char buffer[MBUFTHIRD];
1991    
1992      if (strcmp(pattern_filename, "-") == 0)
1993      {      {
1994      sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);      f = stdin;
1995      pat = buffer;      filename = stdin_name;
     adjust = 4;  
1996      }      }
1997    else if (word_match)    else
1998      {      {
1999      sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);      f = fopen(pattern_filename, "r");
2000      pat = buffer;      if (f == NULL)
2001      adjust = 2;        {
2002          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2003            strerror(errno));
2004          goto EXIT2;
2005          }
2006        filename = pattern_filename;
2007      }      }
   else pat = argv[i++];  
   
   pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);  
2008    
2009    if (pattern_list[0] == NULL)    while (fgets(buffer, MBUFTHIRD, f) != NULL)
2010      {      {
2011      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",      char *s = buffer + (int)strlen(buffer);
2012        errptr - adjust, error);      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2013      return 2;      *s = 0;
2014        linenumber++;
2015        if (buffer[0] == 0) continue;   /* Skip blank lines */
2016        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2017          goto EXIT2;
2018      }      }
2019    pattern_count++;  
2020      if (f != stdin) fclose(f);
2021    }    }
2022    
2023  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times */
# Line 1109  for (j = 0; j < pattern_count; j++) Line 2030  for (j = 0; j < pattern_count; j++)
2030      char s[16];      char s[16];
2031      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2032      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2033      return 2;      goto EXIT2;
2034      }      }
2035      hint_count++;
2036    }    }
2037    
2038  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2039    
2040  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
2041    {    {
2042    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2043        pcretables);
2044    if (exclude_compiled == NULL)    if (exclude_compiled == NULL)
2045      {      {
2046      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2047        errptr, error);        errptr, error);
2048      return 2;      goto EXIT2;
2049      }      }
2050    }    }
2051    
2052  if (include_pattern != NULL)  if (include_pattern != NULL)
2053    {    {
2054    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2055        pcretables);
2056    if (include_compiled == NULL)    if (include_compiled == NULL)
2057      {      {
2058      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2059        errptr, error);        errptr, error);
2060      return 2;      goto EXIT2;
2061      }      }
2062    }    }
2063    
2064  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2065    
2066  if (i >= argc) return pcregrep(stdin,  if (i >= argc)
2067    (filenames_only || filenames_nomatch_only)? stdin_name : NULL);    {
2068      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2069      goto EXIT;
2070      }
2071    
2072  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2073  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2074  the file name if the argument is not a directory and filenames_only is not set.  the file name if the argument is not a directory and filenames are not
2075  */  otherwise forced. */
2076    
2077  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2078    
2079  for (; i < argc; i++)  for (; i < argc; i++)
2080    {    {
2081    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2082        only_one_at_top);
2083    if (frc > 1) rc = frc;    if (frc > 1) rc = frc;
2084      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2085    }    }
2086    
2087    EXIT:
2088    if (pattern_list != NULL)
2089      {
2090      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2091      free(pattern_list);
2092      }
2093    if (hints_list != NULL)
2094      {
2095      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2096      free(hints_list);
2097      }
2098  return rc;  return rc;
2099    
2100    EXIT2:
2101    rc = 2;
2102    goto EXIT;
2103  }  }
2104    
2105  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.77  
changed lines
  Added in v.236

  ViewVC Help
Powered by ViewVC 1.1.5