/[pcre]/code/tags/pcre-8.01/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-8.01/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 280 by ph10, Wed Dec 5 20:56:03 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
# Line 45  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
 #include "config.h"  
58  #include "pcre.h"  #include "pcre.h"
59    
60  #define FALSE 0  #define FALSE 0
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 62  POSSIBILITY OF SUCH DAMAGE.
62    
63  typedef int BOOL;  typedef int BOOL;
64    
 #define VERSION "4.0 07-Jun-2005"  
65  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
66    
67  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 64  typedef int BOOL; Line 70  typedef int BOOL;
70  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
71  #endif  #endif
72    
73    /* Values for the "filenames" variable, which specifies options for file name
74    output. The order is important; it is assumed that a file name is wanted for
75    all values greater than FN_DEFAULT. */
76    
77    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79    /* Actions for the -d and -D options */
80    
81    enum { dee_READ, dee_SKIP, dee_RECURSE };
82    enum { DEE_READ, DEE_SKIP };
83    
84    /* Actions for special processing options (flag bits) */
85    
86    #define PO_WORD_MATCH     0x0001
87    #define PO_LINE_MATCH     0x0002
88    #define PO_FIXED_STRINGS  0x0004
89    
90    /* Line ending types */
91    
92    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
96  /*************************************************  /*************************************************
97  *               Global variables                 *  *               Global variables                 *
98  *************************************************/  *************************************************/
99    
100    /* Jeffrey Friedl has some debugging requirements that are not part of the
101    regular code. */
102    
103    #ifdef JFRIEDL_DEBUG
104    static int S_arg = -1;
105    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107    static const char *jfriedl_prefix = "";
108    static const char *jfriedl_postfix = "";
109    #endif
110    
111    static int  endlinetype;
112    
113    static char *colour_string = (char *)"1;31";
114    static char *colour_option = NULL;
115    static char *dee_option = NULL;
116    static char *DEE_option = NULL;
117    static char *newline = NULL;
118  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
119  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
120    static char *locale = NULL;
121    
122    static const unsigned char *pcretables = NULL;
123    
124  static int  pattern_count = 0;  static int  pattern_count = 0;
125  static pcre **pattern_list;  static pcre **pattern_list = NULL;
126  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
127    
128  static char *include_pattern = NULL;  static char *include_pattern = NULL;
129  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 85  static pcre *exclude_compiled = NULL; Line 134  static pcre *exclude_compiled = NULL;
134  static int after_context = 0;  static int after_context = 0;
135  static int before_context = 0;  static int before_context = 0;
136  static int both_context = 0;  static int both_context = 0;
137    static int dee_action = dee_READ;
138    static int DEE_action = DEE_READ;
139    static int error_count = 0;
140    static int filenames = FN_DEFAULT;
141    static int process_options = 0;
142    
143  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
144  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
145  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
 static BOOL filenames_nomatch_only = FALSE;  
146  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
147  static BOOL invert = FALSE;  static BOOL invert = FALSE;
148    static BOOL line_offsets = FALSE;
149  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
150  static BOOL number = FALSE;  static BOOL number = FALSE;
151    static BOOL only_matching = FALSE;
152  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
 static BOOL recurse = FALSE;  
153  static BOOL silent = FALSE;  static BOOL silent = FALSE;
154  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
 static BOOL word_match = FALSE;  
155    
156  /* Structure for options and list of them */  /* Structure for options and list of them */
157    
158  enum { OP_NODATA, OP_STRING, OP_NUMBER };  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
159           OP_PATLIST };
160    
161  typedef struct option_item {  typedef struct option_item {
162    int type;    int type;
# Line 112  typedef struct option_item { Line 166  typedef struct option_item {
166    const char *help_text;    const char *help_text;
167  } option_item;  } option_item;
168    
169    /* Options without a single-letter equivalent get a negative value. This can be
170    used to identify them. */
171    
172    #define N_COLOUR    (-1)
173    #define N_EXCLUDE   (-2)
174    #define N_HELP      (-3)
175    #define N_INCLUDE   (-4)
176    #define N_LABEL     (-5)
177    #define N_LOCALE    (-6)
178    #define N_NULL      (-7)
179    #define N_LOFFSETS  (-8)
180    #define N_FOFFSETS  (-9)
181    
182  static option_item optionlist[] = {  static option_item optionlist[] = {
183    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
184    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
185    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
186    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
187    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
188    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
189    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
190    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
191    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
192    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
193    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
194    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
195    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
196    { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
197    { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
198    { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
199    { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
200    { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
201    { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
202    { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
203    { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
204    { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
205    { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
206    { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
207    { OP_NODATA, 0,   NULL,               NULL,            NULL }    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
208      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
209      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
210      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
211      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
212      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
213    #ifdef JFRIEDL_DEBUG
214      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
215    #endif
216      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
217      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
218      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
219      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
220      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
221      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
222      { OP_NODATA,    0,        NULL,               NULL,            NULL }
223  };  };
224    
225    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
226    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
227    that the combination of -w and -x has the same effect as -x on its own, so we
228    can treat them as the same. */
229    
230    static const char *prefix[] = {
231      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
232    
233    static const char *suffix[] = {
234      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
235    
236    /* UTF-8 tables - used only when the newline setting is "any". */
237    
238    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
239    
240    const char utf8_table4[] = {
241      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
242      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
243      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
244      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
245    
246    
247    
248  /*************************************************  /*************************************************
249  *       Functions for directory scanning         *  *            OS-specific functions               *
250  *************************************************/  *************************************************/
251    
252  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
253  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
254    
255    
256  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
257    
258  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
259  #include <sys/types.h>  #include <sys/types.h>
260  #include <sys/stat.h>  #include <sys/stat.h>
261  #include <dirent.h>  #include <dirent.h>
# Line 184  for (;;) Line 287  for (;;)
287    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
288      return dent->d_name;      return dent->d_name;
289    }    }
290  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
291  }  }
292    
293  static void  static void
# Line 194  closedir(dir); Line 297  closedir(dir);
297  }  }
298    
299    
300    /************* Test for regular file in Unix **********/
301    
302    static int
303    isregfile(char *filename)
304    {
305    struct stat statbuf;
306    if (stat(filename, &statbuf) < 0)
307      return 1;        /* In the expectation that opening as a file will fail */
308    return (statbuf.st_mode & S_IFMT) == S_IFREG;
309    }
310    
311    
312    /************* Test stdout for being a terminal in Unix **********/
313    
314    static BOOL
315    is_stdout_tty(void)
316    {
317    return isatty(fileno(stdout));
318    }
319    
320    
321  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
322    
323  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
# Line 201  Lionel Fourquaux. David Burgess added a Line 325  Lionel Fourquaux. David Burgess added a
325  when it did not exist. */  when it did not exist. */
326    
327    
328  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
329    
330  #ifndef STRICT  #ifndef STRICT
331  # define STRICT  # define STRICT
# Line 292  free(dir); Line 416  free(dir);
416  }  }
417    
418    
419    /************* Test for regular file in Win32 **********/
420    
421    /* I don't know how to do this, or if it can be done; assume all paths are
422    regular if they are not directories. */
423    
424    int isregfile(char *filename)
425    {
426    return !isdirectory(filename)
427    }
428    
429    
430    /************* Test stdout for being a terminal in Win32 **********/
431    
432    /* I don't know how to do this; assume never */
433    
434    static BOOL
435    is_stdout_tty(void)
436    {
437    FALSE;
438    }
439    
440    
441  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
442    
443  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 300  free(dir); Line 446  free(dir);
446    
447  typedef void directory_type;  typedef void directory_type;
448    
449  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
450  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
451  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
452  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
453    
454    
455    /************* Test for regular when we can't do it **********/
456    
457    /* Assume all files are regular. */
458    
459    int isregfile(char *filename) { return 1; }
460    
461    
462    /************* Test stdout for being a terminal when we can't do it **********/
463    
464    static BOOL
465    is_stdout_tty(void)
466    {
467    return FALSE;
468    }
469    
470    
471  #endif  #endif
472    
473    
474    
475  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
476  /*************************************************  /*************************************************
477  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
478  *************************************************/  *************************************************/
# Line 332  return sys_errlist[n]; Line 495  return sys_errlist[n];
495    
496    
497  /*************************************************  /*************************************************
498    *             Find end of line                   *
499    *************************************************/
500    
501    /* The length of the endline sequence that is found is set via lenptr. This may
502    be zero at the very end of the file if there is no line-ending sequence there.
503    
504    Arguments:
505      p         current position in line
506      endptr    end of available data
507      lenptr    where to put the length of the eol sequence
508    
509    Returns:    pointer to the last byte of the line
510    */
511    
512    static char *
513    end_of_line(char *p, char *endptr, int *lenptr)
514    {
515    switch(endlinetype)
516      {
517      default:      /* Just in case */
518      case EL_LF:
519      while (p < endptr && *p != '\n') p++;
520      if (p < endptr)
521        {
522        *lenptr = 1;
523        return p + 1;
524        }
525      *lenptr = 0;
526      return endptr;
527    
528      case EL_CR:
529      while (p < endptr && *p != '\r') p++;
530      if (p < endptr)
531        {
532        *lenptr = 1;
533        return p + 1;
534        }
535      *lenptr = 0;
536      return endptr;
537    
538      case EL_CRLF:
539      for (;;)
540        {
541        while (p < endptr && *p != '\r') p++;
542        if (++p >= endptr)
543          {
544          *lenptr = 0;
545          return endptr;
546          }
547        if (*p == '\n')
548          {
549          *lenptr = 2;
550          return p + 1;
551          }
552        }
553      break;
554    
555      case EL_ANYCRLF:
556      while (p < endptr)
557        {
558        int extra = 0;
559        register int c = *((unsigned char *)p);
560    
561        if (utf8 && c >= 0xc0)
562          {
563          int gcii, gcss;
564          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
565          gcss = 6*extra;
566          c = (c & utf8_table3[extra]) << gcss;
567          for (gcii = 1; gcii <= extra; gcii++)
568            {
569            gcss -= 6;
570            c |= (p[gcii] & 0x3f) << gcss;
571            }
572          }
573    
574        p += 1 + extra;
575    
576        switch (c)
577          {
578          case 0x0a:    /* LF */
579          *lenptr = 1;
580          return p;
581    
582          case 0x0d:    /* CR */
583          if (p < endptr && *p == 0x0a)
584            {
585            *lenptr = 2;
586            p++;
587            }
588          else *lenptr = 1;
589          return p;
590    
591          default:
592          break;
593          }
594        }   /* End of loop for ANYCRLF case */
595    
596      *lenptr = 0;  /* Must have hit the end */
597      return endptr;
598    
599      case EL_ANY:
600      while (p < endptr)
601        {
602        int extra = 0;
603        register int c = *((unsigned char *)p);
604    
605        if (utf8 && c >= 0xc0)
606          {
607          int gcii, gcss;
608          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
609          gcss = 6*extra;
610          c = (c & utf8_table3[extra]) << gcss;
611          for (gcii = 1; gcii <= extra; gcii++)
612            {
613            gcss -= 6;
614            c |= (p[gcii] & 0x3f) << gcss;
615            }
616          }
617    
618        p += 1 + extra;
619    
620        switch (c)
621          {
622          case 0x0a:    /* LF */
623          case 0x0b:    /* VT */
624          case 0x0c:    /* FF */
625          *lenptr = 1;
626          return p;
627    
628          case 0x0d:    /* CR */
629          if (p < endptr && *p == 0x0a)
630            {
631            *lenptr = 2;
632            p++;
633            }
634          else *lenptr = 1;
635          return p;
636    
637          case 0x85:    /* NEL */
638          *lenptr = utf8? 2 : 1;
639          return p;
640    
641          case 0x2028:  /* LS */
642          case 0x2029:  /* PS */
643          *lenptr = 3;
644          return p;
645    
646          default:
647          break;
648          }
649        }   /* End of loop for ANY case */
650    
651      *lenptr = 0;  /* Must have hit the end */
652      return endptr;
653      }     /* End of overall switch */
654    }
655    
656    
657    
658    /*************************************************
659    *         Find start of previous line            *
660    *************************************************/
661    
662    /* This is called when looking back for before lines to print.
663    
664    Arguments:
665      p         start of the subsequent line
666      startptr  start of available data
667    
668    Returns:    pointer to the start of the previous line
669    */
670    
671    static char *
672    previous_line(char *p, char *startptr)
673    {
674    switch(endlinetype)
675      {
676      default:      /* Just in case */
677      case EL_LF:
678      p--;
679      while (p > startptr && p[-1] != '\n') p--;
680      return p;
681    
682      case EL_CR:
683      p--;
684      while (p > startptr && p[-1] != '\n') p--;
685      return p;
686    
687      case EL_CRLF:
688      for (;;)
689        {
690        p -= 2;
691        while (p > startptr && p[-1] != '\n') p--;
692        if (p <= startptr + 1 || p[-2] == '\r') return p;
693        }
694      return p;   /* But control should never get here */
695    
696      case EL_ANY:
697      case EL_ANYCRLF:
698      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
699      if (utf8) while ((*p & 0xc0) == 0x80) p--;
700    
701      while (p > startptr)
702        {
703        register int c;
704        char *pp = p - 1;
705    
706        if (utf8)
707          {
708          int extra = 0;
709          while ((*pp & 0xc0) == 0x80) pp--;
710          c = *((unsigned char *)pp);
711          if (c >= 0xc0)
712            {
713            int gcii, gcss;
714            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
715            gcss = 6*extra;
716            c = (c & utf8_table3[extra]) << gcss;
717            for (gcii = 1; gcii <= extra; gcii++)
718              {
719              gcss -= 6;
720              c |= (pp[gcii] & 0x3f) << gcss;
721              }
722            }
723          }
724        else c = *((unsigned char *)pp);
725    
726        if (endlinetype == EL_ANYCRLF) switch (c)
727          {
728          case 0x0a:    /* LF */
729          case 0x0d:    /* CR */
730          return p;
731    
732          default:
733          break;
734          }
735    
736        else switch (c)
737          {
738          case 0x0a:    /* LF */
739          case 0x0b:    /* VT */
740          case 0x0c:    /* FF */
741          case 0x0d:    /* CR */
742          case 0x85:    /* NEL */
743          case 0x2028:  /* LS */
744          case 0x2029:  /* PS */
745          return p;
746    
747          default:
748          break;
749          }
750    
751        p = pp;  /* Back one character */
752        }        /* End of loop for ANY case */
753    
754      return startptr;  /* Hit start of data */
755      }     /* End of overall switch */
756    }
757    
758    
759    
760    
761    
762    /*************************************************
763  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
764  *************************************************/  *************************************************/
765    
766  /* This is called if we are about to lose said lines because of buffer filling,  /* This is called if we are about to lose said lines because of buffer filling,
767  and at the end of the file.  and at the end of the file. The data in the line is written using fwrite() so
768    that a binary zero does not terminate it.
769    
770  Arguments:  Arguments:
771    lastmatchnumber   the number of the last matching line, plus one    lastmatchnumber   the number of the last matching line, plus one
# Line 355  if (after_context > 0 && lastmatchnumber Line 784  if (after_context > 0 && lastmatchnumber
784    int count = 0;    int count = 0;
785    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
786      {      {
787        int ellength;
788      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
789      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
790      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
791      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
792      fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
793      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
794      }      }
795    hyphenpending = TRUE;    hyphenpending = TRUE;
796    }    }
# Line 396  int rc = 1; Line 826  int rc = 1;
826  int linenumber = 1;  int linenumber = 1;
827  int lastmatchnumber = 0;  int lastmatchnumber = 0;
828  int count = 0;  int count = 0;
829    int filepos = 0;
830  int offsets[99];  int offsets[99];
831  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
832  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
# Line 417  way, the buffer is shifted left and re-f Line 848  way, the buffer is shifted left and re-f
848    
849  while (ptr < endptr)  while (ptr < endptr)
850    {    {
851    int i;    int i, endlinelength;
852      int mrc = 0;
853    BOOL match = FALSE;    BOOL match = FALSE;
854      char *matchptr = ptr;
855    char *t = ptr;    char *t = ptr;
856    size_t length, linelength;    size_t length, linelength;
857    
# Line 429  while (ptr < endptr) Line 862  while (ptr < endptr)
862    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
863    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
864    
865    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
866    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
867    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
868    
869      /* Extra processing for Jeffrey Friedl's debugging. */
870    
871    #ifdef JFRIEDL_DEBUG
872      if (jfriedl_XT || jfriedl_XR)
873      {
874          #include <sys/time.h>
875          #include <time.h>
876          struct timeval start_time, end_time;
877          struct timezone dummy;
878    
879          if (jfriedl_XT)
880          {
881              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
882              const char *orig = ptr;
883              ptr = malloc(newlen + 1);
884              if (!ptr) {
885                      printf("out of memory");
886                      exit(2);
887              }
888              endptr = ptr;
889              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
890              for (i = 0; i < jfriedl_XT; i++) {
891                      strncpy(endptr, orig,  length);
892                      endptr += length;
893              }
894              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
895              length = newlen;
896          }
897    
898          if (gettimeofday(&start_time, &dummy) != 0)
899                  perror("bad gettimeofday");
900    
901    
902          for (i = 0; i < jfriedl_XR; i++)
903              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
904    
905          if (gettimeofday(&end_time, &dummy) != 0)
906                  perror("bad gettimeofday");
907    
908          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
909                          -
910                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
911    
912          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
913          return 0;
914      }
915    #endif
916    
917      /* We come back here after a match when the -o option (only_matching) is set,
918      in order to find any further matches in the same line. */
919    
920      ONLY_MATCHING_RESTART:
921    
922    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
923    the final newline in the subject string. */    the final newline in the subject string. */
924    
925    for (i = 0; !match && i < pattern_count; i++)    for (i = 0; i < pattern_count; i++)
926      {      {
927      match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
928        offsets, 99) >= 0;        offsets, 99);
929        if (mrc >= 0) { match = TRUE; break; }
930        if (mrc != PCRE_ERROR_NOMATCH)
931          {
932          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
933          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
934          fprintf(stderr, "this line:\n");
935          fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
936          fprintf(stderr, "\n");
937          if (error_count == 0 &&
938              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
939            {
940            fprintf(stderr, "pcregrep: error %d means that a resource limit "
941              "was exceeded\n", mrc);
942            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
943            }
944          if (error_count++ > 20)
945            {
946            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
947            exit(2);
948            }
949          match = invert;    /* No more matching; don't show the line again */
950          break;
951          }
952      }      }
953    
954    /* If it's a match or a not-match (as required), print what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
955    
956    if (match != invert)    if (match != invert)
957      {      {
958      BOOL hyphenprinted = FALSE;      BOOL hyphenprinted = FALSE;
959    
960      if (filenames_nomatch_only) return 1;      /* We've failed if we want a file that doesn't have any matches. */
961    
962        if (filenames == FN_NOMATCH_ONLY) return 1;
963    
964        /* Just count if just counting is wanted. */
965    
966      if (count_only) count++;      if (count_only) count++;
967    
968      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
969        in the file. */
970    
971        else if (filenames == FN_ONLY)
972        {        {
973        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
974        return 0;        return 0;
975        }        }
976    
977        /* Likewise, if all we want is a yes/no answer. */
978    
979      else if (quiet) return 0;      else if (quiet) return 0;
980    
981        /* The --only-matching option prints just the substring that matched, and
982        the --file-offsets and --line-offsets options output offsets for the
983        matching substring (they both force --only-matching). None of these options
984        prints any context. Afterwards, adjust the start and length, and then jump
985        back to look for further matches in the same line. If we are in invert
986        mode, however, nothing is printed - this could be still useful because the
987        return code is set. */
988    
989        else if (only_matching)
990          {
991          if (!invert)
992            {
993            if (printname != NULL) fprintf(stdout, "%s:", printname);
994            if (number) fprintf(stdout, "%d:", linenumber);
995            if (line_offsets)
996              fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
997                offsets[1] - offsets[0]);
998            else if (file_offsets)
999              fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1000                offsets[1] - offsets[0]);
1001            else
1002              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1003            fprintf(stdout, "\n");
1004            matchptr += offsets[1];
1005            length -= offsets[1];
1006            match = FALSE;
1007            goto ONLY_MATCHING_RESTART;
1008            }
1009          }
1010    
1011        /* This is the default case when none of the above options is set. We print
1012        the matching lines(s), possibly preceded and/or followed by other lines of
1013        context. */
1014    
1015      else      else
1016        {        {
1017        /* See if there is a requirement to print some "after" lines from a        /* See if there is a requirement to print some "after" lines from a
# Line 467  while (ptr < endptr) Line 1019  while (ptr < endptr)
1019    
1020        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1021          {          {
1022            int ellength;
1023          int linecount = 0;          int linecount = 0;
1024          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1025    
1026          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1027            {            {
1028            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1029            linecount++;            linecount++;
1030            }            }
1031    
1032          /* It is important to advance lastmatchrestart during this printing so          /* It is important to advance lastmatchrestart during this printing so
1033          that it interacts correctly with any "before" printing below. */          that it interacts correctly with any "before" printing below. Print
1034            each line's data using fwrite() in case there are binary zeroes. */
1035    
1036          while (lastmatchrestart < p)          while (lastmatchrestart < p)
1037            {            {
1038            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1039            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1040            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1041            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1042            fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1043            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1044            }            }
1045          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1046          }          }
# Line 510  while (ptr < endptr) Line 1063  while (ptr < endptr)
1063          char *p = ptr;          char *p = ptr;
1064    
1065          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1066                 linecount++ < before_context)                 linecount < before_context)
1067            {            {
1068            p--;            linecount++;
1069            while (p > buffer && p[-1] != '\n') p--;            p = previous_line(p, buffer);
1070            }            }
1071    
1072          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 521  while (ptr < endptr) Line 1074  while (ptr < endptr)
1074    
1075          while (p < ptr)          while (p < ptr)
1076            {            {
1077              int ellength;
1078            char *pp = p;            char *pp = p;
1079            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1080            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1081            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1082            fprintf(stdout, "%.*s", pp - p + 1, p);            fwrite(p, 1, pp - p, stdout);
1083            p = pp + 1;            p = pp;
1084            }            }
1085          }          }
1086    
1087        /* Now print the matching line(s); ensure we set hyphenpending at the end        /* Now print the matching line(s); ensure we set hyphenpending at the end
1088        of the file. */        of the file if any context lines are being output. */
1089    
1090          if (after_context > 0 || before_context > 0)
1091            endhyphenpending = TRUE;
1092    
       endhyphenpending = TRUE;  
1093        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (printname != NULL) fprintf(stdout, "%s:", printname);
1094        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
1095    
1096        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1097        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1098        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1099        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1100          the match will always be before the first newline sequence. */
1101    
1102        if (multiline)        if (multiline)
1103          {          {
1104          char *endmatch = ptr + offsets[1];          int ellength;
1105          t = ptr;          char *endmatch = ptr;
1106          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          if (!invert)
1107          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1108          linelength = endmatch - ptr;            endmatch += offsets[1];
1109              t = ptr;
1110              while (t < endmatch)
1111                {
1112                t = end_of_line(t, endptr, &ellength);
1113                if (t <= endmatch) linenumber++; else break;
1114                }
1115              }
1116            endmatch = end_of_line(endmatch, endptr, &ellength);
1117            linelength = endmatch - ptr - ellength;
1118            }
1119    
1120          /*** NOTE: Use only fwrite() to output the data line, so that binary
1121          zeroes are treated as just another data character. */
1122    
1123          /* This extra option, for Jeffrey Friedl's debugging requirements,
1124          replaces the matched string, or a specific captured string if it exists,
1125          with X. When this happens, colouring is ignored. */
1126    
1127    #ifdef JFRIEDL_DEBUG
1128          if (S_arg >= 0 && S_arg < mrc)
1129            {
1130            int first = S_arg * 2;
1131            int last  = first + 1;
1132            fwrite(ptr, 1, offsets[first], stdout);
1133            fprintf(stdout, "X");
1134            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1135          }          }
1136          else
1137    #endif
1138    
1139        fprintf(stdout, "%.*s\n", linelength, ptr);        /* We have to split the line(s) up if colouring. */
1140    
1141          if (do_colour)
1142            {
1143            fwrite(ptr, 1, offsets[0], stdout);
1144            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1145            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1146            fprintf(stdout, "%c[00m", 0x1b);
1147            fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1148              stdout);
1149            }
1150          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1151        }        }
1152    
1153        /* End of doing what has to be done for a match */
1154    
1155      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1156    
1157      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1158      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1159    
1160      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1161      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1162      }      }
1163    
1164    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1165      anything to be printed), we have to move on to the end of the match before
1166      proceeding. */
1167    
1168      if (multiline && invert && match)
1169        {
1170        int ellength;
1171        char *endmatch = ptr + offsets[1];
1172        t = ptr;
1173        while (t < endmatch)
1174          {
1175          t = end_of_line(t, endptr, &ellength);
1176          if (t <= endmatch) linenumber++; else break;
1177          }
1178        endmatch = end_of_line(endmatch, endptr, &ellength);
1179        linelength = endmatch - ptr - ellength;
1180        }
1181    
1182    ptr += linelength + 1;    /* Advance to after the newline and increment the line number. The file
1183      offset to the current line is maintained in filepos. */
1184    
1185      ptr += linelength + endlinelength;
1186      filepos += linelength + endlinelength;
1187    linenumber++;    linenumber++;
1188    
1189    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 599  while (ptr < endptr) Line 1217  while (ptr < endptr)
1217  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1218  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1219    
1220  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);  if (!only_matching && !count_only)
1221  hyphenpending |= endhyphenpending;    {
1222      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1223      hyphenpending |= endhyphenpending;
1224      }
1225    
1226  /* Print the file name if we are looking for those without matches and there  /* Print the file name if we are looking for those without matches and there
1227  were none. If we found a match, we won't have got this far. */  were none. If we found a match, we won't have got this far. */
1228    
1229  if (filenames_nomatch_only)  if (filenames == FN_NOMATCH_ONLY)
1230    {    {
1231    fprintf(stdout, "%s\n", printname);    fprintf(stdout, "%s\n", printname);
1232    return 0;    return 0;
# Line 633  recursing; if it's a file, grep it. Line 1254  recursing; if it's a file, grep it.
1254    
1255  Arguments:  Arguments:
1256    pathname          the path to investigate    pathname          the path to investigate
1257    dir_recurse       TRUE if recursing is wanted (-r)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   show_filenames    TRUE if file names are wanted for multiple files, except  
                       for the only file at top level when not filenames_only  
1258    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1259    
1260  Returns:   0 if there was at least one match  Returns:   0 if there was at least one match
# Line 646  However, file opening failures are suppr Line 1265  However, file opening failures are suppr
1265  */  */
1266    
1267  static int  static int
1268  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
1269  {  {
1270  int rc = 1;  int rc = 1;
1271  int sep;  int sep;
1272  FILE *in;  FILE *in;
 char *printname;  
1273    
1274  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1275    
1276  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1277    {    {
1278    return pcregrep(stdin,    return pcregrep(stdin,
1279      (filenames_only || filenames_nomatch_only ||      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
     (show_filenames && !only_one_at_top))?  
1280        stdin_name : NULL);        stdin_name : NULL);
1281    }    }
1282    
 /* If the file is a directory and we are recursing, scan each file within it,  
 subject to any include or exclude patterns that were set. The scanning code is  
 localized so it can be made system-specific. */  
1283    
1284  if ((sep = isdirectory(pathname)) != 0 && dir_recurse)  /* If the file is a directory, skip if skipping or if we are recursing, scan
1285    {  each file within it, subject to any include or exclude patterns that were set.
1286    char buffer[1024];  The scanning code is localized so it can be made system-specific. */
   char *nextfile;  
   directory_type *dir = opendirectory(pathname);  
1287    
1288    if (dir == NULL)  if ((sep = isdirectory(pathname)) != 0)
1289      {    {
1290      if (!silent)    if (dee_action == dee_SKIP) return 1;
1291        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,    if (dee_action == dee_RECURSE)
         strerror(errno));  
     return 2;  
     }  
   
   while ((nextfile = readdirectory(dir)) != NULL)  
1292      {      {
1293      int frc, blen;      char buffer[1024];
1294      sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);      char *nextfile;
1295      blen = strlen(buffer);      directory_type *dir = opendirectory(pathname);
1296    
1297      if (exclude_compiled != NULL &&      if (dir == NULL)
1298          pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)        {
1299        continue;        if (!silent)
1300            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1301              strerror(errno));
1302          return 2;
1303          }
1304    
1305      if (include_compiled != NULL &&      while ((nextfile = readdirectory(dir)) != NULL)
1306          pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)        {
1307        continue;        int frc, blen;
1308          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1309          blen = strlen(buffer);
1310    
1311          if (exclude_compiled != NULL &&
1312              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1313            continue;
1314    
1315          if (include_compiled != NULL &&
1316              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1317            continue;
1318    
1319          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1320          if (frc > 1) rc = frc;
1321           else if (frc == 0 && rc == 1) rc = 0;
1322          }
1323    
1324      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);      closedirectory(dir);
1325      if (frc > 1) rc = frc;      return rc;
      else if (frc == 0 && rc == 1) rc = 0;  
1326      }      }
   
   closedirectory(dir);  
   return rc;  
1327    }    }
1328    
1329  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* If the file is not a directory and not a regular file, skip it if that's
1330  the first and only argument at top level, we don't show the file name (unless  been requested. */
1331  we are only showing the file name). Otherwise, control is via the  
1332  show_filenames variable. */  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1333    
1334    /* Control reaches here if we have a regular file, or if we have a directory
1335    and recursion or skipping was not requested, or if we have anything else and
1336    skipping was not requested. The scan proceeds. If this is the first and only
1337    argument at top level, we don't show the file name, unless we are only showing
1338    the file name, or the filename was forced (-H). */
1339    
1340  in = fopen(pathname, "r");  in = fopen(pathname, "r");
1341  if (in == NULL)  if (in == NULL)
# Line 719  if (in == NULL) Line 1346  if (in == NULL)
1346    return 2;    return 2;
1347    }    }
1348    
1349  printname =  (filenames_only || filenames_nomatch_only ||  rc = pcregrep(in, (filenames > FN_DEFAULT ||
1350    (show_filenames && !only_one_at_top))? pathname : NULL;    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
   
 rc = pcregrep(in, printname);  
1351    
1352  fclose(in);  fclose(in);
1353  return rc;  return rc;
# Line 738  return rc; Line 1363  return rc;
1363  static int  static int
1364  usage(int rc)  usage(int rc)
1365  {  {
1366  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");  option_item *op;
1367  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Usage: pcregrep [-");
1368    for (op = optionlist; op->one_char != 0; op++)
1369      {
1370      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1371      }
1372    fprintf(stderr, "] [long options] [pattern] [files]\n");
1373    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1374      "options.\n");
1375  return rc;  return rc;
1376  }  }
1377    
# Line 757  option_item *op; Line 1389  option_item *op;
1389    
1390  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1391  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1392  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1393  printf("\"-\" can be used as a file name to mean STDIN.\n");  printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1394  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1395    
1396  printf("Options:\n");  printf("Options:\n");
# Line 794  handle_option(int letter, int options) Line 1426  handle_option(int letter, int options)
1426  {  {
1427  switch(letter)  switch(letter)
1428    {    {
1429    case -1:  help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1430      case N_HELP: help(); exit(0);
1431      case N_LOFFSETS: line_offsets = number = TRUE; break;
1432    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1433    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1434      case 'H': filenames = FN_FORCE; break;
1435      case 'h': filenames = FN_NONE; break;
1436    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1437    case 'l': filenames_only = TRUE; break;    case 'l': filenames = FN_ONLY; break;
1438    case 'L': filenames_nomatch_only = TRUE; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1439    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1440    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1441      case 'o': only_matching = TRUE; break;
1442    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1443    case 'r': recurse = TRUE; break;    case 'r': dee_action = dee_RECURSE; break;
1444    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1445    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1446    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1447    case 'w': word_match = TRUE; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1448    case 'x': whole_lines = TRUE; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1449    
1450    case 'V':    case 'V':
1451    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1452    exit(0);    exit(0);
1453    break;    break;
1454    
# Line 828  return options; Line 1464  return options;
1464    
1465    
1466  /*************************************************  /*************************************************
1467    *          Construct printed ordinal             *
1468    *************************************************/
1469    
1470    /* This turns a number into "1st", "3rd", etc. */
1471    
1472    static char *
1473    ordin(int n)
1474    {
1475    static char buffer[8];
1476    char *p = buffer;
1477    sprintf(p, "%d", n);
1478    while (*p != 0) p++;
1479    switch (n%10)
1480      {
1481      case 1: strcpy(p, "st"); break;
1482      case 2: strcpy(p, "nd"); break;
1483      case 3: strcpy(p, "rd"); break;
1484      default: strcpy(p, "th"); break;
1485      }
1486    return buffer;
1487    }
1488    
1489    
1490    
1491    /*************************************************
1492    *          Compile a single pattern              *
1493    *************************************************/
1494    
1495    /* When the -F option has been used, this is called for each substring.
1496    Otherwise it's called for each supplied pattern.
1497    
1498    Arguments:
1499      pattern        the pattern string
1500      options        the PCRE options
1501      filename       the file name, or NULL for a command-line pattern
1502      count          0 if this is the only command line pattern, or
1503                     number of the command line pattern, or
1504                     linenumber for a pattern from a file
1505    
1506    Returns:         TRUE on success, FALSE after an error
1507    */
1508    
1509    static BOOL
1510    compile_single_pattern(char *pattern, int options, char *filename, int count)
1511    {
1512    char buffer[MBUFTHIRD + 16];
1513    const char *error;
1514    int errptr;
1515    
1516    if (pattern_count >= MAX_PATTERN_COUNT)
1517      {
1518      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1519        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1520      return FALSE;
1521      }
1522    
1523    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1524      suffix[process_options]);
1525    pattern_list[pattern_count] =
1526      pcre_compile(buffer, options, &error, &errptr, pcretables);
1527    if (pattern_list[pattern_count] != NULL)
1528      {
1529      pattern_count++;
1530      return TRUE;
1531      }
1532    
1533    /* Handle compile errors */
1534    
1535    errptr -= (int)strlen(prefix[process_options]);
1536    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1537    
1538    if (filename == NULL)
1539      {
1540      if (count == 0)
1541        fprintf(stderr, "pcregrep: Error in command-line regex "
1542          "at offset %d: %s\n", errptr, error);
1543      else
1544        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1545          "at offset %d: %s\n", ordin(count), errptr, error);
1546      }
1547    else
1548      {
1549      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1550        "at offset %d: %s\n", count, filename, errptr, error);
1551      }
1552    
1553    return FALSE;
1554    }
1555    
1556    
1557    
1558    /*************************************************
1559    *           Compile one supplied pattern         *
1560    *************************************************/
1561    
1562    /* When the -F option has been used, each string may be a list of strings,
1563    separated by line breaks. They will be matched literally.
1564    
1565    Arguments:
1566      pattern        the pattern string
1567      options        the PCRE options
1568      filename       the file name, or NULL for a command-line pattern
1569      count          0 if this is the only command line pattern, or
1570                     number of the command line pattern, or
1571                     linenumber for a pattern from a file
1572    
1573    Returns:         TRUE on success, FALSE after an error
1574    */
1575    
1576    static BOOL
1577    compile_pattern(char *pattern, int options, char *filename, int count)
1578    {
1579    if ((process_options & PO_FIXED_STRINGS) != 0)
1580      {
1581      char *eop = pattern + strlen(pattern);
1582      char buffer[MBUFTHIRD];
1583      for(;;)
1584        {
1585        int ellength;
1586        char *p = end_of_line(pattern, eop, &ellength);
1587        if (ellength == 0)
1588          return compile_single_pattern(pattern, options, filename, count);
1589        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1590        pattern = p;
1591        if (!compile_single_pattern(buffer, options, filename, count))
1592          return FALSE;
1593        }
1594      }
1595    else return compile_single_pattern(pattern, options, filename, count);
1596    }
1597    
1598    
1599    
1600    /*************************************************
1601  *                Main program                    *  *                Main program                    *
1602  *************************************************/  *************************************************/
1603    
# Line 838  main(int argc, char **argv) Line 1608  main(int argc, char **argv)
1608  {  {
1609  int i, j;  int i, j;
1610  int rc = 1;  int rc = 1;
1611  int options = 0;  int pcre_options = 0;
1612    int cmd_pattern_count = 0;
1613    int hint_count = 0;
1614  int errptr;  int errptr;
 const char *error;  
1615  BOOL only_one_at_top;  BOOL only_one_at_top;
1616    char *patterns[MAX_PATTERN_COUNT];
1617    const char *locale_from = "--locale";
1618    const char *error;
1619    
1620    /* Set the default line ending value from the default in the PCRE library;
1621    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1622    */
1623    
1624    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1625    switch(i)
1626      {
1627      default:                 newline = (char *)"lf"; break;
1628      case '\r':               newline = (char *)"cr"; break;
1629      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1630      case -1:                 newline = (char *)"any"; break;
1631      case -2:                 newline = (char *)"anycrlf"; break;
1632      }
1633    
1634  /* Process the options */  /* Process the options */
1635    
# Line 855  for (i = 1; i < argc; i++) Line 1643  for (i = 1; i < argc; i++)
1643    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1644    
1645    /* If we hit an argument that is just "-", it may be a reference to STDIN,    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1646    but only if we have previously had -f to define the patterns. */    but only if we have previously had -e or -f to define the patterns. */
1647    
1648    if (argv[i][1] == 0)    if (argv[i][1] == 0)
1649      {      {
1650      if (pattern_filename != NULL) break;      if (pattern_filename != NULL || pattern_count > 0) break;
1651        else exit(usage(2));        else exit(usage(2));
1652      }      }
1653    
# Line 881  for (i = 1; i < argc; i++) Line 1669  for (i = 1; i < argc; i++)
1669      /* Some long options have data that follows after =, for example file=name.      /* Some long options have data that follows after =, for example file=name.
1670      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
1671      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
1672      with Jeff Friedl in preferring "regex" without the "p". These options are      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1673      entered in the table as "regex(p)". No option is in both these categories,      These options are entered in the table as "regex(p)". No option is in both
1674      fortunately. */      these categories, fortunately. */
1675    
1676      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1677        {        {
# Line 898  for (i = 1; i < argc; i++) Line 1686  for (i = 1; i < argc; i++)
1686          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1687            {            {
1688            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1689            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1690            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1691              {              {
1692              option_data = arg + arglen;              option_data = arg + arglen;
# Line 917  for (i = 1; i < argc; i++) Line 1705  for (i = 1; i < argc; i++)
1705          char buff2[24];          char buff2[24];
1706          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1707          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1708          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1709            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1710          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1711            break;            break;
1712          }          }
# Line 931  for (i = 1; i < argc; i++) Line 1719  for (i = 1; i < argc; i++)
1719        }        }
1720      }      }
1721    
1722    
1723      /* Jeffrey Friedl's debugging harness uses these additional options which
1724      are not in the right form for putting in the option table because they use
1725      only one hyphen, yet are more than one character long. By putting them
1726      separately here, they will not get displayed as part of the help() output,
1727      but I don't think Jeffrey will care about that. */
1728    
1729    #ifdef JFRIEDL_DEBUG
1730      else if (strcmp(argv[i], "-pre") == 0) {
1731              jfriedl_prefix = argv[++i];
1732              continue;
1733      } else if (strcmp(argv[i], "-post") == 0) {
1734              jfriedl_postfix = argv[++i];
1735              continue;
1736      } else if (strcmp(argv[i], "-XT") == 0) {
1737              sscanf(argv[++i], "%d", &jfriedl_XT);
1738              continue;
1739      } else if (strcmp(argv[i], "-XR") == 0) {
1740              sscanf(argv[++i], "%d", &jfriedl_XR);
1741              continue;
1742      }
1743    #endif
1744    
1745    
1746    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
1747    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
1748    
# Line 953  for (i = 1; i < argc; i++) Line 1765  for (i = 1; i < argc; i++)
1765          option_data = s+1;          option_data = s+1;
1766          break;          break;
1767          }          }
1768        options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1769        }        }
1770      }      }
1771    
1772    /* At this point we should have op pointing to a matched option */    /* At this point we should have op pointing to a matched option. If the type
1773      is NO_DATA, it means that there is no data, and the option might set
1774      something in the PCRE options. */
1775    
1776    if (op->type == OP_NODATA)    if (op->type == OP_NODATA)
     options = handle_option(op->one_char, options);  
   else  
1777      {      {
1778      if (*option_data == 0)      pcre_options = handle_option(op->one_char, pcre_options);
1779        continue;
1780        }
1781    
1782      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1783      either has a value or defaults to something. It cannot have data in a
1784      separate item. At the moment, the only such options are "colo(u)r" and
1785      Jeffrey Friedl's special -S debugging option. */
1786    
1787      if (*option_data == 0 &&
1788          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1789        {
1790        switch (op->one_char)
1791        {        {
1792        if (i >= argc - 1 || longopwasequals)        case N_COLOUR:
1793          {        colour_option = (char *)"auto";
1794          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        break;
1795          exit(usage(2));  #ifdef JFRIEDL_DEBUG
1796          }        case 'S':
1797        option_data = argv[++i];        S_arg = 0;
1798          break;
1799    #endif
1800          }
1801        continue;
1802        }
1803    
1804      /* Otherwise, find the data string for the option. */
1805    
1806      if (*option_data == 0)
1807        {
1808        if (i >= argc - 1 || longopwasequals)
1809          {
1810          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1811          exit(usage(2));
1812          }
1813        option_data = argv[++i];
1814        }
1815    
1816      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1817      multiple times to create a list of patterns. */
1818    
1819      if (op->type == OP_PATLIST)
1820        {
1821        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1822          {
1823          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1824            MAX_PATTERN_COUNT);
1825          return 2;
1826        }        }
1827        patterns[cmd_pattern_count++] = option_data;
1828        }
1829    
1830      /* Otherwise, deal with single string or numeric data values. */
1831    
1832      if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1833        {
1834        *((char **)op->dataptr) = option_data;
1835        }
1836      else
1837        {
1838        char *endptr;
1839        int n = strtoul(option_data, &endptr, 10);
1840        if (*endptr != 0)
1841        {        {
1842        char *endptr;        if (longop)
       int n = strtoul(option_data, &endptr, 10);  
       if (*endptr != 0)  
1843          {          {
1844          if (longop)          char *equals = strchr(op->long_name, '=');
1845            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1846              option_data, op->long_name);            equals - op->long_name;
1847          else          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1848            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",            option_data, nlen, op->long_name);
             option_data, op->one_char);  
         exit(usage(2));  
1849          }          }
1850        *((int *)op->dataptr) = n;        else
1851            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1852              option_data, op->one_char);
1853          exit(usage(2));
1854        }        }
1855        *((int *)op->dataptr) = n;
1856      }      }
1857    }    }
1858    
# Line 1000  if (both_context > 0) Line 1864  if (both_context > 0)
1864    if (after_context == 0) after_context = both_context;    if (after_context == 0) after_context = both_context;
1865    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
1866    }    }
1867    
1868    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
1869    However, the latter two set the only_matching flag. */
1870    
1871  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  if ((only_matching && (file_offsets || line_offsets)) ||
1872  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));      (file_offsets && line_offsets))
   
 if (pattern_list == NULL || hints_list == NULL)  
1873    {    {
1874    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
1875    return 2;      "and/or --line-offsets\n");
1876      exit(usage(2));
1877    }    }
1878    
1879    if (file_offsets || line_offsets) only_matching = TRUE;
1880    
1881  /* Compile the regular expression(s). */  /* If a locale has not been provided as an option, see if the LC_CTYPE or
1882    LC_ALL environment variable is set, and if so, use it. */
1883    
1884  if (pattern_filename != NULL)  if (locale == NULL)
1885      {
1886      locale = getenv("LC_ALL");
1887      locale_from = "LCC_ALL";
1888      }
1889    
1890    if (locale == NULL)
1891    {    {
1892    FILE *f = fopen(pattern_filename, "r");    locale = getenv("LC_CTYPE");
1893    char buffer[MBUFTHIRD + 16];    locale_from = "LC_CTYPE";
1894    char *rdstart;    }
1895    int adjust = 0;  
1896    /* If a locale has been provided, set it, and generate the tables the PCRE
1897    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1898    
1899    if (f == NULL)  if (locale != NULL)
1900      {
1901      if (setlocale(LC_CTYPE, locale) == NULL)
1902      {      {
1903      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1904        strerror(errno));        locale, locale_from);
1905      return 2;      return 2;
1906      }      }
1907      pcretables = pcre_maketables();
1908      }
1909    
1910    /* Sort out colouring */
1911    
1912    if (whole_lines)  if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1913      {
1914      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1915      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1916      else
1917      {      {
1918      strcpy(buffer, "^(?:");      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1919      adjust = 4;        colour_option);
1920        return 2;
1921      }      }
1922    else if (word_match)    if (do_colour)
1923      {      {
1924      strcpy(buffer, "\\b");      char *cs = getenv("PCREGREP_COLOUR");
1925      adjust = 2;      if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1926        if (cs != NULL) colour_string = cs;
1927      }      }
1928      }
1929    
1930    /* Interpret the newline type; the default settings are Unix-like. */
1931    
1932    rdstart = buffer + adjust;  if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1933    while (fgets(rdstart, MBUFTHIRD, f) != NULL)    {
1934      pcre_options |= PCRE_NEWLINE_CR;
1935      endlinetype = EL_CR;
1936      }
1937    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1938      {
1939      pcre_options |= PCRE_NEWLINE_LF;
1940      endlinetype = EL_LF;
1941      }
1942    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1943      {
1944      pcre_options |= PCRE_NEWLINE_CRLF;
1945      endlinetype = EL_CRLF;
1946      }
1947    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1948      {
1949      pcre_options |= PCRE_NEWLINE_ANY;
1950      endlinetype = EL_ANY;
1951      }
1952    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1953      {
1954      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1955      endlinetype = EL_ANYCRLF;
1956      }
1957    else
1958      {
1959      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1960      return 2;
1961      }
1962    
1963    /* Interpret the text values for -d and -D */
1964    
1965    if (dee_option != NULL)
1966      {
1967      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1968      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1969      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1970      else
1971      {      {
1972      char *s = rdstart + (int)strlen(rdstart);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1973      if (pattern_count >= MAX_PATTERN_COUNT)      return 2;
1974        {      }
1975        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",    }
1976          MAX_PATTERN_COUNT);  
1977        return 2;  if (DEE_option != NULL)
1978        }    {
1979      while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;    if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1980      if (s == rdstart) continue;    else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1981      if (whole_lines) strcpy(s, ")$");    else
1982        else if (word_match)strcpy(s, "\\b");      {
1983          else *s = 0;      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1984      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,      return 2;
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr - adjust, error);  
       return 2;  
       }  
1985      }      }
   fclose(f);  
1986    }    }
1987    
1988  /* If no file name, a single regex must be given inline. */  /* Check the values for Jeffrey Friedl's debugging options. */
1989    
1990  else  #ifdef JFRIEDL_DEBUG
1991    if (S_arg > 9)
1992      {
1993      fprintf(stderr, "pcregrep: bad value for -S option\n");
1994      return 2;
1995      }
1996    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1997    {    {
1998    char buffer[MBUFTHIRD + 16];    if (jfriedl_XT == 0) jfriedl_XT = 1;
1999    char *pat;    if (jfriedl_XR == 0) jfriedl_XR = 1;
2000    int adjust = 0;    }
2001    #endif
2002    
2003    /* Get memory to store the pattern and hints lists. */
2004    
2005    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2006    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2007    
2008    if (pattern_list == NULL || hints_list == NULL)
2009      {
2010      fprintf(stderr, "pcregrep: malloc failed\n");
2011      goto EXIT2;
2012      }
2013    
2014    /* If no patterns were provided by -e, and there is no file provided by -f,
2015    the first argument is the one and only pattern, and it must exist. */
2016    
2017    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2018      {
2019    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2020      patterns[cmd_pattern_count++] = argv[i++];
2021      }
2022    
2023    if (whole_lines)  /* Compile the patterns that were provided on the command line, either by
2024    multiple uses of -e or as a single unkeyed pattern. */
2025    
2026    for (j = 0; j < cmd_pattern_count; j++)
2027      {
2028      if (!compile_pattern(patterns[j], pcre_options, NULL,
2029           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2030        goto EXIT2;
2031      }
2032    
2033    /* Compile the regular expressions that are provided in a file. */
2034    
2035    if (pattern_filename != NULL)
2036      {
2037      int linenumber = 0;
2038      FILE *f;
2039      char *filename;
2040      char buffer[MBUFTHIRD];
2041    
2042      if (strcmp(pattern_filename, "-") == 0)
2043      {      {
2044      sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);      f = stdin;
2045      pat = buffer;      filename = stdin_name;
     adjust = 4;  
2046      }      }
2047    else if (word_match)    else
2048      {      {
2049      sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);      f = fopen(pattern_filename, "r");
2050      pat = buffer;      if (f == NULL)
2051      adjust = 2;        {
2052          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2053            strerror(errno));
2054          goto EXIT2;
2055          }
2056        filename = pattern_filename;
2057      }      }
   else pat = argv[i++];  
   
   pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);  
2058    
2059    if (pattern_list[0] == NULL)    while (fgets(buffer, MBUFTHIRD, f) != NULL)
2060      {      {
2061      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",      char *s = buffer + (int)strlen(buffer);
2062        errptr - adjust, error);      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2063      return 2;      *s = 0;
2064        linenumber++;
2065        if (buffer[0] == 0) continue;   /* Skip blank lines */
2066        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2067          goto EXIT2;
2068      }      }
2069    pattern_count++;  
2070      if (f != stdin) fclose(f);
2071    }    }
2072    
2073  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times */
# Line 1109  for (j = 0; j < pattern_count; j++) Line 2080  for (j = 0; j < pattern_count; j++)
2080      char s[16];      char s[16];
2081      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2082      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2083      return 2;      goto EXIT2;
2084      }      }
2085      hint_count++;
2086    }    }
2087    
2088  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2089    
2090  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
2091    {    {
2092    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2093        pcretables);
2094    if (exclude_compiled == NULL)    if (exclude_compiled == NULL)
2095      {      {
2096      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2097        errptr, error);        errptr, error);
2098      return 2;      goto EXIT2;
2099      }      }
2100    }    }
2101    
2102  if (include_pattern != NULL)  if (include_pattern != NULL)
2103    {    {
2104    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2105        pcretables);
2106    if (include_compiled == NULL)    if (include_compiled == NULL)
2107      {      {
2108      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2109        errptr, error);        errptr, error);
2110      return 2;      goto EXIT2;
2111      }      }
2112    }    }
2113    
2114  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2115    
2116  if (i >= argc) return pcregrep(stdin,  if (i >= argc)
2117    (filenames_only || filenames_nomatch_only)? stdin_name : NULL);    {
2118      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2119      goto EXIT;
2120      }
2121    
2122  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2123  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2124  the file name if the argument is not a directory and filenames_only is not set.  the file name if the argument is not a directory and filenames are not
2125  */  otherwise forced. */
2126    
2127  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2128    
2129  for (; i < argc; i++)  for (; i < argc; i++)
2130    {    {
2131    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2132        only_one_at_top);
2133    if (frc > 1) rc = frc;    if (frc > 1) rc = frc;
2134      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2135    }    }
2136    
2137    EXIT:
2138    if (pattern_list != NULL)
2139      {
2140      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2141      free(pattern_list);
2142      }
2143    if (hints_list != NULL)
2144      {
2145      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2146      free(hints_list);
2147      }
2148  return rc;  return rc;
2149    
2150    EXIT2:
2151    rc = 2;
2152    goto EXIT;
2153  }  }
2154    
2155  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.77  
changed lines
  Added in v.280

  ViewVC Help
Powered by ViewVC 1.1.5