/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 286 by ph10, Mon Dec 17 14:46:11 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
# Line 45  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 55  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.0 07-Jun-2005"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    
75  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 64  typedef int BOOL; Line 78  typedef int BOOL;
78  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
79  #endif  #endif
80    
81    /* Values for the "filenames" variable, which specifies options for file name
82    output. The order is important; it is assumed that a file name is wanted for
83    all values greater than FN_DEFAULT. */
84    
85    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86    
87    /* File reading styles */
88    
89    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90    
91    /* Actions for the -d and -D options */
92    
93    enum { dee_READ, dee_SKIP, dee_RECURSE };
94    enum { DEE_READ, DEE_SKIP };
95    
96    /* Actions for special processing options (flag bits) */
97    
98    #define PO_WORD_MATCH     0x0001
99    #define PO_LINE_MATCH     0x0002
100    #define PO_FIXED_STRINGS  0x0004
101    
102    /* Line ending types */
103    
104    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105    
106    
107    
108  /*************************************************  /*************************************************
109  *               Global variables                 *  *               Global variables                 *
110  *************************************************/  *************************************************/
111    
112    /* Jeffrey Friedl has some debugging requirements that are not part of the
113    regular code. */
114    
115    #ifdef JFRIEDL_DEBUG
116    static int S_arg = -1;
117    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
118    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
119    static const char *jfriedl_prefix = "";
120    static const char *jfriedl_postfix = "";
121    #endif
122    
123    static int  endlinetype;
124    
125    static char *colour_string = (char *)"1;31";
126    static char *colour_option = NULL;
127    static char *dee_option = NULL;
128    static char *DEE_option = NULL;
129    static char *newline = NULL;
130  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
131  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
132    static char *locale = NULL;
133    
134    static const unsigned char *pcretables = NULL;
135    
136  static int  pattern_count = 0;  static int  pattern_count = 0;
137  static pcre **pattern_list;  static pcre **pattern_list = NULL;
138  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
139    
140  static char *include_pattern = NULL;  static char *include_pattern = NULL;
141  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 85  static pcre *exclude_compiled = NULL; Line 146  static pcre *exclude_compiled = NULL;
146  static int after_context = 0;  static int after_context = 0;
147  static int before_context = 0;  static int before_context = 0;
148  static int both_context = 0;  static int both_context = 0;
149    static int dee_action = dee_READ;
150    static int DEE_action = DEE_READ;
151    static int error_count = 0;
152    static int filenames = FN_DEFAULT;
153    static int process_options = 0;
154    
155  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
156  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
157  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
 static BOOL filenames_nomatch_only = FALSE;  
158  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
159  static BOOL invert = FALSE;  static BOOL invert = FALSE;
160    static BOOL line_offsets = FALSE;
161  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
162  static BOOL number = FALSE;  static BOOL number = FALSE;
163    static BOOL only_matching = FALSE;
164  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
 static BOOL recurse = FALSE;  
165  static BOOL silent = FALSE;  static BOOL silent = FALSE;
166  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
 static BOOL word_match = FALSE;  
167    
168  /* Structure for options and list of them */  /* Structure for options and list of them */
169    
170  enum { OP_NODATA, OP_STRING, OP_NUMBER };  enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
171           OP_PATLIST };
172    
173  typedef struct option_item {  typedef struct option_item {
174    int type;    int type;
# Line 112  typedef struct option_item { Line 178  typedef struct option_item {
178    const char *help_text;    const char *help_text;
179  } option_item;  } option_item;
180    
181    /* Options without a single-letter equivalent get a negative value. This can be
182    used to identify them. */
183    
184    #define N_COLOUR    (-1)
185    #define N_EXCLUDE   (-2)
186    #define N_HELP      (-3)
187    #define N_INCLUDE   (-4)
188    #define N_LABEL     (-5)
189    #define N_LOCALE    (-6)
190    #define N_NULL      (-7)
191    #define N_LOFFSETS  (-8)
192    #define N_FOFFSETS  (-9)
193    
194  static option_item optionlist[] = {  static option_item optionlist[] = {
195    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
196    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
197    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
198    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
199    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
200    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
201    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
202    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
203    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
204    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
205    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
206    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
207    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
208    { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
209    { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
210    { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
211    { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
212    { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
213    { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
214    { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
215    { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },    { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
216    { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
217    { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
218    { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
219    { OP_NODATA, 0,   NULL,               NULL,            NULL }    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
220      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
221      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
222      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
223      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
224      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
225    #ifdef JFRIEDL_DEBUG
226      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
227    #endif
228      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
229      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
230      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
231      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
232      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
233      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
234      { OP_NODATA,    0,        NULL,               NULL,            NULL }
235  };  };
236    
237    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
238    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
239    that the combination of -w and -x has the same effect as -x on its own, so we
240    can treat them as the same. */
241    
242    static const char *prefix[] = {
243      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
244    
245    static const char *suffix[] = {
246      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
247    
248    /* UTF-8 tables - used only when the newline setting is "any". */
249    
250    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
251    
252    const char utf8_table4[] = {
253      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
254      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
255      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
256      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
257    
258    
259    
260  /*************************************************  /*************************************************
261  *       Functions for directory scanning         *  *            OS-specific functions               *
262  *************************************************/  *************************************************/
263    
264  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
265  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
266    
267    
268  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
269    
270  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
271  #include <sys/types.h>  #include <sys/types.h>
272  #include <sys/stat.h>  #include <sys/stat.h>
273  #include <dirent.h>  #include <dirent.h>
# Line 184  for (;;) Line 299  for (;;)
299    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
300      return dent->d_name;      return dent->d_name;
301    }    }
302  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
303  }  }
304    
305  static void  static void
# Line 194  closedir(dir); Line 309  closedir(dir);
309  }  }
310    
311    
312    /************* Test for regular file in Unix **********/
313    
314    static int
315    isregfile(char *filename)
316    {
317    struct stat statbuf;
318    if (stat(filename, &statbuf) < 0)
319      return 1;        /* In the expectation that opening as a file will fail */
320    return (statbuf.st_mode & S_IFMT) == S_IFREG;
321    }
322    
323    
324    /************* Test stdout for being a terminal in Unix **********/
325    
326    static BOOL
327    is_stdout_tty(void)
328    {
329    return isatty(fileno(stdout));
330    }
331    
332    
333  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
334    
335  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
336  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
337  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
338    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
339    */
340    
341  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
342    
343  #ifndef STRICT  #ifndef STRICT
344  # define STRICT  # define STRICT
# Line 209  when it did not exist. */ Line 346  when it did not exist. */
346  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
347  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
348  #endif  #endif
349    
350    #include <windows.h>
351    
352  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
353  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
354  #endif  #endif
355    
 #include <windows.h>  
   
356  typedef struct directory_type  typedef struct directory_type
357  {  {
358  HANDLE handle;  HANDLE handle;
# Line 292  free(dir); Line 430  free(dir);
430  }  }
431    
432    
433    /************* Test for regular file in Win32 **********/
434    
435    /* I don't know how to do this, or if it can be done; assume all paths are
436    regular if they are not directories. */
437    
438    int isregfile(char *filename)
439    {
440    return !isdirectory(filename);
441    }
442    
443    
444    /************* Test stdout for being a terminal in Win32 **********/
445    
446    /* I don't know how to do this; assume never */
447    
448    static BOOL
449    is_stdout_tty(void)
450    {
451    return FALSE;
452    }
453    
454    
455  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
456    
457  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 300  free(dir); Line 460  free(dir);
460    
461  typedef void directory_type;  typedef void directory_type;
462    
463  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
464  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
465  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
466  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
467    
468    
469    /************* Test for regular when we can't do it **********/
470    
471    /* Assume all files are regular. */
472    
473    int isregfile(char *filename) { return 1; }
474    
475    
476    /************* Test stdout for being a terminal when we can't do it **********/
477    
478    static BOOL
479    is_stdout_tty(void)
480    {
481    return FALSE;
482    }
483    
484    
485  #endif  #endif
486    
487    
488    
489  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
490  /*************************************************  /*************************************************
491  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
492  *************************************************/  *************************************************/
# Line 332  return sys_errlist[n]; Line 509  return sys_errlist[n];
509    
510    
511  /*************************************************  /*************************************************
512    *             Find end of line                   *
513    *************************************************/
514    
515    /* The length of the endline sequence that is found is set via lenptr. This may
516    be zero at the very end of the file if there is no line-ending sequence there.
517    
518    Arguments:
519      p         current position in line
520      endptr    end of available data
521      lenptr    where to put the length of the eol sequence
522    
523    Returns:    pointer to the last byte of the line
524    */
525    
526    static char *
527    end_of_line(char *p, char *endptr, int *lenptr)
528    {
529    switch(endlinetype)
530      {
531      default:      /* Just in case */
532      case EL_LF:
533      while (p < endptr && *p != '\n') p++;
534      if (p < endptr)
535        {
536        *lenptr = 1;
537        return p + 1;
538        }
539      *lenptr = 0;
540      return endptr;
541    
542      case EL_CR:
543      while (p < endptr && *p != '\r') p++;
544      if (p < endptr)
545        {
546        *lenptr = 1;
547        return p + 1;
548        }
549      *lenptr = 0;
550      return endptr;
551    
552      case EL_CRLF:
553      for (;;)
554        {
555        while (p < endptr && *p != '\r') p++;
556        if (++p >= endptr)
557          {
558          *lenptr = 0;
559          return endptr;
560          }
561        if (*p == '\n')
562          {
563          *lenptr = 2;
564          return p + 1;
565          }
566        }
567      break;
568    
569      case EL_ANYCRLF:
570      while (p < endptr)
571        {
572        int extra = 0;
573        register int c = *((unsigned char *)p);
574    
575        if (utf8 && c >= 0xc0)
576          {
577          int gcii, gcss;
578          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
579          gcss = 6*extra;
580          c = (c & utf8_table3[extra]) << gcss;
581          for (gcii = 1; gcii <= extra; gcii++)
582            {
583            gcss -= 6;
584            c |= (p[gcii] & 0x3f) << gcss;
585            }
586          }
587    
588        p += 1 + extra;
589    
590        switch (c)
591          {
592          case 0x0a:    /* LF */
593          *lenptr = 1;
594          return p;
595    
596          case 0x0d:    /* CR */
597          if (p < endptr && *p == 0x0a)
598            {
599            *lenptr = 2;
600            p++;
601            }
602          else *lenptr = 1;
603          return p;
604    
605          default:
606          break;
607          }
608        }   /* End of loop for ANYCRLF case */
609    
610      *lenptr = 0;  /* Must have hit the end */
611      return endptr;
612    
613      case EL_ANY:
614      while (p < endptr)
615        {
616        int extra = 0;
617        register int c = *((unsigned char *)p);
618    
619        if (utf8 && c >= 0xc0)
620          {
621          int gcii, gcss;
622          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
623          gcss = 6*extra;
624          c = (c & utf8_table3[extra]) << gcss;
625          for (gcii = 1; gcii <= extra; gcii++)
626            {
627            gcss -= 6;
628            c |= (p[gcii] & 0x3f) << gcss;
629            }
630          }
631    
632        p += 1 + extra;
633    
634        switch (c)
635          {
636          case 0x0a:    /* LF */
637          case 0x0b:    /* VT */
638          case 0x0c:    /* FF */
639          *lenptr = 1;
640          return p;
641    
642          case 0x0d:    /* CR */
643          if (p < endptr && *p == 0x0a)
644            {
645            *lenptr = 2;
646            p++;
647            }
648          else *lenptr = 1;
649          return p;
650    
651          case 0x85:    /* NEL */
652          *lenptr = utf8? 2 : 1;
653          return p;
654    
655          case 0x2028:  /* LS */
656          case 0x2029:  /* PS */
657          *lenptr = 3;
658          return p;
659    
660          default:
661          break;
662          }
663        }   /* End of loop for ANY case */
664    
665      *lenptr = 0;  /* Must have hit the end */
666      return endptr;
667      }     /* End of overall switch */
668    }
669    
670    
671    
672    /*************************************************
673    *         Find start of previous line            *
674    *************************************************/
675    
676    /* This is called when looking back for before lines to print.
677    
678    Arguments:
679      p         start of the subsequent line
680      startptr  start of available data
681    
682    Returns:    pointer to the start of the previous line
683    */
684    
685    static char *
686    previous_line(char *p, char *startptr)
687    {
688    switch(endlinetype)
689      {
690      default:      /* Just in case */
691      case EL_LF:
692      p--;
693      while (p > startptr && p[-1] != '\n') p--;
694      return p;
695    
696      case EL_CR:
697      p--;
698      while (p > startptr && p[-1] != '\n') p--;
699      return p;
700    
701      case EL_CRLF:
702      for (;;)
703        {
704        p -= 2;
705        while (p > startptr && p[-1] != '\n') p--;
706        if (p <= startptr + 1 || p[-2] == '\r') return p;
707        }
708      return p;   /* But control should never get here */
709    
710      case EL_ANY:
711      case EL_ANYCRLF:
712      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
713      if (utf8) while ((*p & 0xc0) == 0x80) p--;
714    
715      while (p > startptr)
716        {
717        register int c;
718        char *pp = p - 1;
719    
720        if (utf8)
721          {
722          int extra = 0;
723          while ((*pp & 0xc0) == 0x80) pp--;
724          c = *((unsigned char *)pp);
725          if (c >= 0xc0)
726            {
727            int gcii, gcss;
728            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
729            gcss = 6*extra;
730            c = (c & utf8_table3[extra]) << gcss;
731            for (gcii = 1; gcii <= extra; gcii++)
732              {
733              gcss -= 6;
734              c |= (pp[gcii] & 0x3f) << gcss;
735              }
736            }
737          }
738        else c = *((unsigned char *)pp);
739    
740        if (endlinetype == EL_ANYCRLF) switch (c)
741          {
742          case 0x0a:    /* LF */
743          case 0x0d:    /* CR */
744          return p;
745    
746          default:
747          break;
748          }
749    
750        else switch (c)
751          {
752          case 0x0a:    /* LF */
753          case 0x0b:    /* VT */
754          case 0x0c:    /* FF */
755          case 0x0d:    /* CR */
756          case 0x85:    /* NEL */
757          case 0x2028:  /* LS */
758          case 0x2029:  /* PS */
759          return p;
760    
761          default:
762          break;
763          }
764    
765        p = pp;  /* Back one character */
766        }        /* End of loop for ANY case */
767    
768      return startptr;  /* Hit start of data */
769      }     /* End of overall switch */
770    }
771    
772    
773    
774    
775    
776    /*************************************************
777  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
778  *************************************************/  *************************************************/
779    
780  /* This is called if we are about to lose said lines because of buffer filling,  /* This is called if we are about to lose said lines because of buffer filling,
781  and at the end of the file.  and at the end of the file. The data in the line is written using fwrite() so
782    that a binary zero does not terminate it.
783    
784  Arguments:  Arguments:
785    lastmatchnumber   the number of the last matching line, plus one    lastmatchnumber   the number of the last matching line, plus one
# Line 355  if (after_context > 0 && lastmatchnumber Line 798  if (after_context > 0 && lastmatchnumber
798    int count = 0;    int count = 0;
799    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
800      {      {
801        int ellength;
802      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
803      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
804      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
805      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
806      fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
807      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
808      }      }
809    hyphenpending = TRUE;    hyphenpending = TRUE;
810    }    }
# Line 380  be in the middle third most of the time, Line 824  be in the middle third most of the time,
824  "before" context printing.  "before" context printing.
825    
826  Arguments:  Arguments:
827    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
828                   the gzFile pointer when reading is via libz
829                   the BZFILE pointer when reading is via libbz2
830      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
831    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
832                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
833                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
834    
835  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
836                 1 otherwise (no matches)                 1 otherwise (no matches)
837                   2 if there is a read error on a .bz2 file
838  */  */
839    
840  static int  static int
841  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
842  {  {
843  int rc = 1;  int rc = 1;
844  int linenumber = 1;  int linenumber = 1;
845  int lastmatchnumber = 0;  int lastmatchnumber = 0;
846  int count = 0;  int count = 0;
847    int filepos = 0;
848  int offsets[99];  int offsets[99];
849  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
850  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
# Line 403  char *ptr = buffer; Line 852  char *ptr = buffer;
852  char *endptr;  char *endptr;
853  size_t bufflength;  size_t bufflength;
854  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
855    FILE *in = NULL;                    /* Ensure initialized */
856    
857  /* Do the first read into the start of the buffer and set up the pointer to  #ifdef SUPPORT_LIBZ
858  end of what we have. */  gzFile ingz = NULL;
859    #endif
860    
861    #ifdef SUPPORT_LIBBZ2
862    BZFILE *inbz2 = NULL;
863    #endif
864    
865    
866    /* Do the first read into the start of the buffer and set up the pointer to end
867    of what we have. In the case of libz, a non-zipped .gz file will be read as a
868    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
869    fail. */
870    
871    #ifdef SUPPORT_LIBZ
872    if (frtype == FR_LIBZ)
873      {
874      ingz = (gzFile)handle;
875      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
876      }
877    else
878    #endif
879    
880    #ifdef SUPPORT_LIBBZ2
881    if (frtype == FR_LIBBZ2)
882      {
883      inbz2 = (BZFILE *)handle;
884      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
885      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
886      }                                    /* without the cast it is unsigned. */
887    else
888    #endif
889    
890      {
891      in = (FILE *)handle;
892      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
893      }
894    
 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  
895  endptr = buffer + bufflength;  endptr = buffer + bufflength;
896    
897  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 417  way, the buffer is shifted left and re-f Line 901  way, the buffer is shifted left and re-f
901    
902  while (ptr < endptr)  while (ptr < endptr)
903    {    {
904    int i;    int i, endlinelength;
905      int mrc = 0;
906    BOOL match = FALSE;    BOOL match = FALSE;
907      char *matchptr = ptr;
908    char *t = ptr;    char *t = ptr;
909    size_t length, linelength;    size_t length, linelength;
910    
# Line 429  while (ptr < endptr) Line 915  while (ptr < endptr)
915    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
916    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
917    
918    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
919    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
920    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
921    
922      /* Extra processing for Jeffrey Friedl's debugging. */
923    
924    #ifdef JFRIEDL_DEBUG
925      if (jfriedl_XT || jfriedl_XR)
926      {
927          #include <sys/time.h>
928          #include <time.h>
929          struct timeval start_time, end_time;
930          struct timezone dummy;
931    
932          if (jfriedl_XT)
933          {
934              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
935              const char *orig = ptr;
936              ptr = malloc(newlen + 1);
937              if (!ptr) {
938                      printf("out of memory");
939                      exit(2);
940              }
941              endptr = ptr;
942              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
943              for (i = 0; i < jfriedl_XT; i++) {
944                      strncpy(endptr, orig,  length);
945                      endptr += length;
946              }
947              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
948              length = newlen;
949          }
950    
951          if (gettimeofday(&start_time, &dummy) != 0)
952                  perror("bad gettimeofday");
953    
954    
955          for (i = 0; i < jfriedl_XR; i++)
956              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
957    
958          if (gettimeofday(&end_time, &dummy) != 0)
959                  perror("bad gettimeofday");
960    
961          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
962                          -
963                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
964    
965          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
966          return 0;
967      }
968    #endif
969    
970      /* We come back here after a match when the -o option (only_matching) is set,
971      in order to find any further matches in the same line. */
972    
973      ONLY_MATCHING_RESTART:
974    
975    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
976    the final newline in the subject string. */    the final newline in the subject string. */
977    
978    for (i = 0; !match && i < pattern_count; i++)    for (i = 0; i < pattern_count; i++)
979      {      {
980      match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
981        offsets, 99) >= 0;        offsets, 99);
982        if (mrc >= 0) { match = TRUE; break; }
983        if (mrc != PCRE_ERROR_NOMATCH)
984          {
985          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
986          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
987          fprintf(stderr, "this line:\n");
988          fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
989          fprintf(stderr, "\n");
990          if (error_count == 0 &&
991              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
992            {
993            fprintf(stderr, "pcregrep: error %d means that a resource limit "
994              "was exceeded\n", mrc);
995            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
996            }
997          if (error_count++ > 20)
998            {
999            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
1000            exit(2);
1001            }
1002          match = invert;    /* No more matching; don't show the line again */
1003          break;
1004          }
1005      }      }
1006    
1007    /* If it's a match or a not-match (as required), print what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1008    
1009    if (match != invert)    if (match != invert)
1010      {      {
1011      BOOL hyphenprinted = FALSE;      BOOL hyphenprinted = FALSE;
1012    
1013      if (filenames_nomatch_only) return 1;      /* We've failed if we want a file that doesn't have any matches. */
1014    
1015        if (filenames == FN_NOMATCH_ONLY) return 1;
1016    
1017        /* Just count if just counting is wanted. */
1018    
1019      if (count_only) count++;      if (count_only) count++;
1020    
1021      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
1022        in the file. */
1023    
1024        else if (filenames == FN_ONLY)
1025        {        {
1026        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1027        return 0;        return 0;
1028        }        }
1029    
1030        /* Likewise, if all we want is a yes/no answer. */
1031    
1032      else if (quiet) return 0;      else if (quiet) return 0;
1033    
1034        /* The --only-matching option prints just the substring that matched, and
1035        the --file-offsets and --line-offsets options output offsets for the
1036        matching substring (they both force --only-matching). None of these options
1037        prints any context. Afterwards, adjust the start and length, and then jump
1038        back to look for further matches in the same line. If we are in invert
1039        mode, however, nothing is printed - this could be still useful because the
1040        return code is set. */
1041    
1042        else if (only_matching)
1043          {
1044          if (!invert)
1045            {
1046            if (printname != NULL) fprintf(stdout, "%s:", printname);
1047            if (number) fprintf(stdout, "%d:", linenumber);
1048            if (line_offsets)
1049              fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
1050                offsets[1] - offsets[0]);
1051            else if (file_offsets)
1052              fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1053                offsets[1] - offsets[0]);
1054            else
1055              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1056            fprintf(stdout, "\n");
1057            matchptr += offsets[1];
1058            length -= offsets[1];
1059            match = FALSE;
1060            goto ONLY_MATCHING_RESTART;
1061            }
1062          }
1063    
1064        /* This is the default case when none of the above options is set. We print
1065        the matching lines(s), possibly preceded and/or followed by other lines of
1066        context. */
1067    
1068      else      else
1069        {        {
1070        /* See if there is a requirement to print some "after" lines from a        /* See if there is a requirement to print some "after" lines from a
# Line 467  while (ptr < endptr) Line 1072  while (ptr < endptr)
1072    
1073        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1074          {          {
1075            int ellength;
1076          int linecount = 0;          int linecount = 0;
1077          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1078    
1079          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1080            {            {
1081            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1082            linecount++;            linecount++;
1083            }            }
1084    
1085          /* It is important to advance lastmatchrestart during this printing so          /* It is important to advance lastmatchrestart during this printing so
1086          that it interacts correctly with any "before" printing below. */          that it interacts correctly with any "before" printing below. Print
1087            each line's data using fwrite() in case there are binary zeroes. */
1088    
1089          while (lastmatchrestart < p)          while (lastmatchrestart < p)
1090            {            {
1091            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1092            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1093            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1094            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1095            fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1096            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1097            }            }
1098          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1099          }          }
# Line 510  while (ptr < endptr) Line 1116  while (ptr < endptr)
1116          char *p = ptr;          char *p = ptr;
1117    
1118          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&          while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1119                 linecount++ < before_context)                 linecount < before_context)
1120            {            {
1121            p--;            linecount++;
1122            while (p > buffer && p[-1] != '\n') p--;            p = previous_line(p, buffer);
1123            }            }
1124    
1125          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 521  while (ptr < endptr) Line 1127  while (ptr < endptr)
1127    
1128          while (p < ptr)          while (p < ptr)
1129            {            {
1130              int ellength;
1131            char *pp = p;            char *pp = p;
1132            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1133            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1134            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1135            fprintf(stdout, "%.*s", pp - p + 1, p);            fwrite(p, 1, pp - p, stdout);
1136            p = pp + 1;            p = pp;
1137            }            }
1138          }          }
1139    
1140        /* Now print the matching line(s); ensure we set hyphenpending at the end        /* Now print the matching line(s); ensure we set hyphenpending at the end
1141        of the file. */        of the file if any context lines are being output. */
1142    
1143          if (after_context > 0 || before_context > 0)
1144            endhyphenpending = TRUE;
1145    
       endhyphenpending = TRUE;  
1146        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (printname != NULL) fprintf(stdout, "%s:", printname);
1147        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
1148    
1149        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1150        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1151        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1152        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1153          the match will always be before the first newline sequence. */
1154    
1155        if (multiline)        if (multiline)
1156          {          {
1157          char *endmatch = ptr + offsets[1];          int ellength;
1158          t = ptr;          char *endmatch = ptr;
1159          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          if (!invert)
1160          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1161          linelength = endmatch - ptr;            endmatch += offsets[1];
1162              t = ptr;
1163              while (t < endmatch)
1164                {
1165                t = end_of_line(t, endptr, &ellength);
1166                if (t <= endmatch) linenumber++; else break;
1167                }
1168              }
1169            endmatch = end_of_line(endmatch, endptr, &ellength);
1170            linelength = endmatch - ptr - ellength;
1171            }
1172    
1173          /*** NOTE: Use only fwrite() to output the data line, so that binary
1174          zeroes are treated as just another data character. */
1175    
1176          /* This extra option, for Jeffrey Friedl's debugging requirements,
1177          replaces the matched string, or a specific captured string if it exists,
1178          with X. When this happens, colouring is ignored. */
1179    
1180    #ifdef JFRIEDL_DEBUG
1181          if (S_arg >= 0 && S_arg < mrc)
1182            {
1183            int first = S_arg * 2;
1184            int last  = first + 1;
1185            fwrite(ptr, 1, offsets[first], stdout);
1186            fprintf(stdout, "X");
1187            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1188          }          }
1189          else
1190    #endif
1191    
1192        fprintf(stdout, "%.*s\n", linelength, ptr);        /* We have to split the line(s) up if colouring. */
1193    
1194          if (do_colour)
1195            {
1196            fwrite(ptr, 1, offsets[0], stdout);
1197            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1198            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1199            fprintf(stdout, "%c[00m", 0x1b);
1200            fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1201              stdout);
1202            }
1203          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1204        }        }
1205    
1206        /* End of doing what has to be done for a match */
1207    
1208      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1209    
1210      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1211      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1212    
1213      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1214      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1215      }      }
1216    
1217    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1218      anything to be printed), we have to move on to the end of the match before
1219      proceeding. */
1220    
1221      if (multiline && invert && match)
1222        {
1223        int ellength;
1224        char *endmatch = ptr + offsets[1];
1225        t = ptr;
1226        while (t < endmatch)
1227          {
1228          t = end_of_line(t, endptr, &ellength);
1229          if (t <= endmatch) linenumber++; else break;
1230          }
1231        endmatch = end_of_line(endmatch, endptr, &ellength);
1232        linelength = endmatch - ptr - ellength;
1233        }
1234    
1235      /* Advance to after the newline and increment the line number. The file
1236      offset to the current line is maintained in filepos. */
1237    
1238    ptr += linelength + 1;    ptr += linelength + endlinelength;
1239      filepos += linelength + endlinelength;
1240    linenumber++;    linenumber++;
1241    
1242    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 587  while (ptr < endptr) Line 1258  while (ptr < endptr)
1258    
1259      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1260      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
     endptr = buffer + bufflength;  
1261    
1262      /* Adjust any last match point */  #ifdef SUPPORT_LIBZ
1263        if (frtype == FR_LIBZ)
1264          bufflength = 2*MBUFTHIRD +
1265            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1266        else
1267    #endif
1268    
1269      if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;  #ifdef SUPPORT_LIBBZ2
1270        if (frtype == FR_LIBBZ2)
1271          bufflength = 2*MBUFTHIRD +
1272            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1273        else
1274    #endif
1275    
1276        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1277    
1278        endptr = buffer + bufflength;
1279    
1280        /* Adjust any last match point */
1281    
1282        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1283      }      }
1284    }     /* Loop through the whole file */    }     /* Loop through the whole file */
1285    
1286  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1287  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1288    
1289  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);  if (!only_matching && !count_only)
1290  hyphenpending |= endhyphenpending;    {
1291      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1292      hyphenpending |= endhyphenpending;
1293      }
1294    
1295  /* Print the file name if we are looking for those without matches and there  /* Print the file name if we are looking for those without matches and there
1296  were none. If we found a match, we won't have got this far. */  were none. If we found a match, we won't have got this far. */
1297    
1298  if (filenames_nomatch_only)  if (filenames == FN_NOMATCH_ONLY)
1299    {    {
1300    fprintf(stdout, "%s\n", printname);    fprintf(stdout, "%s\n", printname);
1301    return 0;    return 0;
# Line 633  recursing; if it's a file, grep it. Line 1323  recursing; if it's a file, grep it.
1323    
1324  Arguments:  Arguments:
1325    pathname          the path to investigate    pathname          the path to investigate
1326    dir_recurse       TRUE if recursing is wanted (-r)    dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
   show_filenames    TRUE if file names are wanted for multiple files, except  
                       for the only file at top level when not filenames_only  
1327    only_one_at_top   TRUE if the path is the only one at toplevel    only_one_at_top   TRUE if the path is the only one at toplevel
1328    
1329  Returns:   0 if there was at least one match  Returns:   0 if there was at least one match
# Line 646  However, file opening failures are suppr Line 1334  However, file opening failures are suppr
1334  */  */
1335    
1336  static int  static int
1337  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
1338  {  {
1339  int rc = 1;  int rc = 1;
1340  int sep;  int sep;
1341  FILE *in;  int frtype;
1342  char *printname;  int pathlen;
1343    void *handle;
1344    FILE *in = NULL;           /* Ensure initialized */
1345    
1346    #ifdef SUPPORT_LIBZ
1347    gzFile ingz = NULL;
1348    #endif
1349    
1350    #ifdef SUPPORT_LIBBZ2
1351    BZFILE *inbz2 = NULL;
1352    #endif
1353    
1354  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1355    
1356  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1357    {    {
1358    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1359      (filenames_only || filenames_nomatch_only ||      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
     (show_filenames && !only_one_at_top))?  
1360        stdin_name : NULL);        stdin_name : NULL);
1361    }    }
1362    
1363  /* If the file is a directory and we are recursing, scan each file within it,  /* If the file is a directory, skip if skipping or if we are recursing, scan
1364  subject to any include or exclude patterns that were set. The scanning code is  each file within it, subject to any include or exclude patterns that were set.
1365  localized so it can be made system-specific. */  The scanning code is localized so it can be made system-specific. */
1366    
1367  if ((sep = isdirectory(pathname)) != 0 && dir_recurse)  if ((sep = isdirectory(pathname)) != 0)
1368    {    {
1369    char buffer[1024];    if (dee_action == dee_SKIP) return 1;
1370    char *nextfile;    if (dee_action == dee_RECURSE)
1371    directory_type *dir = opendirectory(pathname);      {
1372        char buffer[1024];
1373        char *nextfile;
1374        directory_type *dir = opendirectory(pathname);
1375    
1376    if (dir == NULL)      if (dir == NULL)
1377          {
1378          if (!silent)
1379            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1380              strerror(errno));
1381          return 2;
1382          }
1383    
1384        while ((nextfile = readdirectory(dir)) != NULL)
1385          {
1386          int frc, blen;
1387          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1388          blen = strlen(buffer);
1389    
1390          if (exclude_compiled != NULL &&
1391              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1392            continue;
1393    
1394          if (include_compiled != NULL &&
1395              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1396            continue;
1397    
1398          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1399          if (frc > 1) rc = frc;
1400           else if (frc == 0 && rc == 1) rc = 0;
1401          }
1402    
1403        closedirectory(dir);
1404        return rc;
1405        }
1406      }
1407    
1408    /* If the file is not a directory and not a regular file, skip it if that's
1409    been requested. */
1410    
1411    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1412    
1413    /* Control reaches here if we have a regular file, or if we have a directory
1414    and recursion or skipping was not requested, or if we have anything else and
1415    skipping was not requested. The scan proceeds. If this is the first and only
1416    argument at top level, we don't show the file name, unless we are only showing
1417    the file name, or the filename was forced (-H). */
1418    
1419    pathlen = strlen(pathname);
1420    
1421    /* Open using zlib if it is supported and the file name ends with .gz. */
1422    
1423    #ifdef SUPPORT_LIBZ
1424    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1425      {
1426      ingz = gzopen(pathname, "rb");
1427      if (ingz == NULL)
1428      {      {
1429      if (!silent)      if (!silent)
1430        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1431          strerror(errno));          strerror(errno));
1432      return 2;      return 2;
1433      }      }
1434      handle = (void *)ingz;
1435      frtype = FR_LIBZ;
1436      }
1437    else
1438    #endif
1439    
1440    while ((nextfile = readdirectory(dir)) != NULL)  /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
     {  
     int frc, blen;  
     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);  
     blen = strlen(buffer);  
   
     if (exclude_compiled != NULL &&  
         pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)  
       continue;  
1441    
1442      if (include_compiled != NULL &&  #ifdef SUPPORT_LIBBZ2
1443          pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1444        continue;    {
1445      inbz2 = BZ2_bzopen(pathname, "rb");
1446      handle = (void *)inbz2;
1447      frtype = FR_LIBBZ2;
1448      }
1449    else
1450    #endif
1451    
1452      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);  /* Otherwise use plain fopen(). The label is so that we can come back here if
1453      if (frc > 1) rc = frc;  an attempt to read a .bz2 file indicates that it really is a plain file. */
      else if (frc == 0 && rc == 1) rc = 0;  
     }  
1454    
1455    closedirectory(dir);  #ifdef SUPPORT_LIBBZ2
1456    return rc;  PLAIN_FILE:
1457    #endif
1458      {
1459      in = fopen(pathname, "r");
1460      handle = (void *)in;
1461      frtype = FR_PLAIN;
1462    }    }
1463    
1464  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* All the opening methods return errno when they fail. */
 the first and only argument at top level, we don't show the file name (unless  
 we are only showing the file name). Otherwise, control is via the  
 show_filenames variable. */  
1465    
1466  in = fopen(pathname, "r");  if (handle == NULL)
 if (in == NULL)  
1467    {    {
1468    if (!silent)    if (!silent)
1469      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 719  if (in == NULL) Line 1471  if (in == NULL)
1471    return 2;    return 2;
1472    }    }
1473    
1474  printname =  (filenames_only || filenames_nomatch_only ||  /* Now grep the file */
1475    (show_filenames && !only_one_at_top))? pathname : NULL;  
1476    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1477      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1478    
1479    /* Close in an appropriate manner. */
1480    
1481    #ifdef SUPPORT_LIBZ
1482    if (frtype == FR_LIBZ)
1483      gzclose(ingz);
1484    else
1485    #endif
1486    
1487    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1488    read failed. If the error indicates that the file isn't in fact bzipped, try
1489    again as a normal file. */
1490    
1491    #ifdef SUPPORT_LIBBZ2
1492    if (frtype == FR_LIBBZ2)
1493      {
1494      if (rc == 2)
1495        {
1496        int errnum;
1497        const char *err = BZ2_bzerror(inbz2, &errnum);
1498        if (errnum == BZ_DATA_ERROR_MAGIC)
1499          {
1500          BZ2_bzclose(inbz2);
1501          goto PLAIN_FILE;
1502          }
1503        else if (!silent)
1504          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1505            pathname, err);
1506        }
1507      BZ2_bzclose(inbz2);
1508      }
1509    else
1510    #endif
1511    
1512  rc = pcregrep(in, printname);  /* Normal file close */
1513    
1514  fclose(in);  fclose(in);
1515    
1516    /* Pass back the yield from pcregrep(). */
1517    
1518  return rc;  return rc;
1519  }  }
1520    
# Line 738  return rc; Line 1528  return rc;
1528  static int  static int
1529  usage(int rc)  usage(int rc)
1530  {  {
1531  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");  option_item *op;
1532  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Usage: pcregrep [-");
1533    for (op = optionlist; op->one_char != 0; op++)
1534      {
1535      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1536      }
1537    fprintf(stderr, "] [long options] [pattern] [files]\n");
1538    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1539      "options.\n");
1540  return rc;  return rc;
1541  }  }
1542    
# Line 757  option_item *op; Line 1554  option_item *op;
1554    
1555  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1556  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1557  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1558  printf("\"-\" can be used as a file name to mean STDIN.\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1559    
1560    #ifdef SUPPORT_LIBZ
1561    printf("Files whose names end in .gz are read using zlib.\n");
1562    #endif
1563    
1564    #ifdef SUPPORT_LIBBZ2
1565    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1566    #endif
1567    
1568    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1569    printf("Other files and the standard input are read as plain files.\n\n");
1570    #else
1571    printf("All files are read as plain files, without any interpretation.\n\n");
1572    #endif
1573    
1574    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1575  printf("Options:\n");  printf("Options:\n");
1576    
1577  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 794  handle_option(int letter, int options) Line 1605  handle_option(int letter, int options)
1605  {  {
1606  switch(letter)  switch(letter)
1607    {    {
1608    case -1:  help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1609      case N_HELP: help(); exit(0);
1610      case N_LOFFSETS: line_offsets = number = TRUE; break;
1611    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1612    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1613      case 'H': filenames = FN_FORCE; break;
1614      case 'h': filenames = FN_NONE; break;
1615    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1616    case 'l': filenames_only = TRUE; break;    case 'l': filenames = FN_ONLY; break;
1617    case 'L': filenames_nomatch_only = TRUE; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1618    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1619    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1620      case 'o': only_matching = TRUE; break;
1621    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1622    case 'r': recurse = TRUE; break;    case 'r': dee_action = dee_RECURSE; break;
1623    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1624    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1625    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1626    case 'w': word_match = TRUE; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1627    case 'x': whole_lines = TRUE; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1628    
1629    case 'V':    case 'V':
1630    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1631    exit(0);    exit(0);
1632    break;    break;
1633    
# Line 828  return options; Line 1643  return options;
1643    
1644    
1645  /*************************************************  /*************************************************
1646    *          Construct printed ordinal             *
1647    *************************************************/
1648    
1649    /* This turns a number into "1st", "3rd", etc. */
1650    
1651    static char *
1652    ordin(int n)
1653    {
1654    static char buffer[8];
1655    char *p = buffer;
1656    sprintf(p, "%d", n);
1657    while (*p != 0) p++;
1658    switch (n%10)
1659      {
1660      case 1: strcpy(p, "st"); break;
1661      case 2: strcpy(p, "nd"); break;
1662      case 3: strcpy(p, "rd"); break;
1663      default: strcpy(p, "th"); break;
1664      }
1665    return buffer;
1666    }
1667    
1668    
1669    
1670    /*************************************************
1671    *          Compile a single pattern              *
1672    *************************************************/
1673    
1674    /* When the -F option has been used, this is called for each substring.
1675    Otherwise it's called for each supplied pattern.
1676    
1677    Arguments:
1678      pattern        the pattern string
1679      options        the PCRE options
1680      filename       the file name, or NULL for a command-line pattern
1681      count          0 if this is the only command line pattern, or
1682                     number of the command line pattern, or
1683                     linenumber for a pattern from a file
1684    
1685    Returns:         TRUE on success, FALSE after an error
1686    */
1687    
1688    static BOOL
1689    compile_single_pattern(char *pattern, int options, char *filename, int count)
1690    {
1691    char buffer[MBUFTHIRD + 16];
1692    const char *error;
1693    int errptr;
1694    
1695    if (pattern_count >= MAX_PATTERN_COUNT)
1696      {
1697      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1698        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1699      return FALSE;
1700      }
1701    
1702    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1703      suffix[process_options]);
1704    pattern_list[pattern_count] =
1705      pcre_compile(buffer, options, &error, &errptr, pcretables);
1706    if (pattern_list[pattern_count] != NULL)
1707      {
1708      pattern_count++;
1709      return TRUE;
1710      }
1711    
1712    /* Handle compile errors */
1713    
1714    errptr -= (int)strlen(prefix[process_options]);
1715    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1716    
1717    if (filename == NULL)
1718      {
1719      if (count == 0)
1720        fprintf(stderr, "pcregrep: Error in command-line regex "
1721          "at offset %d: %s\n", errptr, error);
1722      else
1723        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1724          "at offset %d: %s\n", ordin(count), errptr, error);
1725      }
1726    else
1727      {
1728      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1729        "at offset %d: %s\n", count, filename, errptr, error);
1730      }
1731    
1732    return FALSE;
1733    }
1734    
1735    
1736    
1737    /*************************************************
1738    *           Compile one supplied pattern         *
1739    *************************************************/
1740    
1741    /* When the -F option has been used, each string may be a list of strings,
1742    separated by line breaks. They will be matched literally.
1743    
1744    Arguments:
1745      pattern        the pattern string
1746      options        the PCRE options
1747      filename       the file name, or NULL for a command-line pattern
1748      count          0 if this is the only command line pattern, or
1749                     number of the command line pattern, or
1750                     linenumber for a pattern from a file
1751    
1752    Returns:         TRUE on success, FALSE after an error
1753    */
1754    
1755    static BOOL
1756    compile_pattern(char *pattern, int options, char *filename, int count)
1757    {
1758    if ((process_options & PO_FIXED_STRINGS) != 0)
1759      {
1760      char *eop = pattern + strlen(pattern);
1761      char buffer[MBUFTHIRD];
1762      for(;;)
1763        {
1764        int ellength;
1765        char *p = end_of_line(pattern, eop, &ellength);
1766        if (ellength == 0)
1767          return compile_single_pattern(pattern, options, filename, count);
1768        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1769        pattern = p;
1770        if (!compile_single_pattern(buffer, options, filename, count))
1771          return FALSE;
1772        }
1773      }
1774    else return compile_single_pattern(pattern, options, filename, count);
1775    }
1776    
1777    
1778    
1779    /*************************************************
1780  *                Main program                    *  *                Main program                    *
1781  *************************************************/  *************************************************/
1782    
# Line 838  main(int argc, char **argv) Line 1787  main(int argc, char **argv)
1787  {  {
1788  int i, j;  int i, j;
1789  int rc = 1;  int rc = 1;
1790  int options = 0;  int pcre_options = 0;
1791    int cmd_pattern_count = 0;
1792    int hint_count = 0;
1793  int errptr;  int errptr;
 const char *error;  
1794  BOOL only_one_at_top;  BOOL only_one_at_top;
1795    char *patterns[MAX_PATTERN_COUNT];
1796    const char *locale_from = "--locale";
1797    const char *error;
1798    
1799    /* Set the default line ending value from the default in the PCRE library;
1800    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1801    */
1802    
1803    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1804    switch(i)
1805      {
1806      default:                 newline = (char *)"lf"; break;
1807      case '\r':               newline = (char *)"cr"; break;
1808      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1809      case -1:                 newline = (char *)"any"; break;
1810      case -2:                 newline = (char *)"anycrlf"; break;
1811      }
1812    
1813  /* Process the options */  /* Process the options */
1814    
# Line 855  for (i = 1; i < argc; i++) Line 1822  for (i = 1; i < argc; i++)
1822    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1823    
1824    /* If we hit an argument that is just "-", it may be a reference to STDIN,    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1825    but only if we have previously had -f to define the patterns. */    but only if we have previously had -e or -f to define the patterns. */
1826    
1827    if (argv[i][1] == 0)    if (argv[i][1] == 0)
1828      {      {
1829      if (pattern_filename != NULL) break;      if (pattern_filename != NULL || pattern_count > 0) break;
1830        else exit(usage(2));        else exit(usage(2));
1831      }      }
1832    
# Line 881  for (i = 1; i < argc; i++) Line 1848  for (i = 1; i < argc; i++)
1848      /* Some long options have data that follows after =, for example file=name.      /* Some long options have data that follows after =, for example file=name.
1849      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
1850      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
1851      with Jeff Friedl in preferring "regex" without the "p". These options are      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1852      entered in the table as "regex(p)". No option is in both these categories,      These options are entered in the table as "regex(p)". No option is in both
1853      fortunately. */      these categories, fortunately. */
1854    
1855      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1856        {        {
# Line 898  for (i = 1; i < argc; i++) Line 1865  for (i = 1; i < argc; i++)
1865          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1866            {            {
1867            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1868            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1869            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1870              {              {
1871              option_data = arg + arglen;              option_data = arg + arglen;
# Line 917  for (i = 1; i < argc; i++) Line 1884  for (i = 1; i < argc; i++)
1884          char buff2[24];          char buff2[24];
1885          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1886          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1887          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1888            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1889          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1890            break;            break;
1891          }          }
# Line 931  for (i = 1; i < argc; i++) Line 1898  for (i = 1; i < argc; i++)
1898        }        }
1899      }      }
1900    
1901    
1902      /* Jeffrey Friedl's debugging harness uses these additional options which
1903      are not in the right form for putting in the option table because they use
1904      only one hyphen, yet are more than one character long. By putting them
1905      separately here, they will not get displayed as part of the help() output,
1906      but I don't think Jeffrey will care about that. */
1907    
1908    #ifdef JFRIEDL_DEBUG
1909      else if (strcmp(argv[i], "-pre") == 0) {
1910              jfriedl_prefix = argv[++i];
1911              continue;
1912      } else if (strcmp(argv[i], "-post") == 0) {
1913              jfriedl_postfix = argv[++i];
1914              continue;
1915      } else if (strcmp(argv[i], "-XT") == 0) {
1916              sscanf(argv[++i], "%d", &jfriedl_XT);
1917              continue;
1918      } else if (strcmp(argv[i], "-XR") == 0) {
1919              sscanf(argv[++i], "%d", &jfriedl_XR);
1920              continue;
1921      }
1922    #endif
1923    
1924    
1925    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
1926    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
1927    
# Line 953  for (i = 1; i < argc; i++) Line 1944  for (i = 1; i < argc; i++)
1944          option_data = s+1;          option_data = s+1;
1945          break;          break;
1946          }          }
1947        options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1948        }        }
1949      }      }
1950    
1951    /* At this point we should have op pointing to a matched option */    /* At this point we should have op pointing to a matched option. If the type
1952      is NO_DATA, it means that there is no data, and the option might set
1953      something in the PCRE options. */
1954    
1955    if (op->type == OP_NODATA)    if (op->type == OP_NODATA)
     options = handle_option(op->one_char, options);  
   else  
1956      {      {
1957      if (*option_data == 0)      pcre_options = handle_option(op->one_char, pcre_options);
1958        continue;
1959        }
1960    
1961      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1962      either has a value or defaults to something. It cannot have data in a
1963      separate item. At the moment, the only such options are "colo(u)r" and
1964      Jeffrey Friedl's special -S debugging option. */
1965    
1966      if (*option_data == 0 &&
1967          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1968        {
1969        switch (op->one_char)
1970        {        {
1971        if (i >= argc - 1 || longopwasequals)        case N_COLOUR:
1972          {        colour_option = (char *)"auto";
1973          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        break;
1974          exit(usage(2));  #ifdef JFRIEDL_DEBUG
1975          }        case 'S':
1976        option_data = argv[++i];        S_arg = 0;
1977          break;
1978    #endif
1979        }        }
1980        continue;
1981        }
1982    
1983      /* Otherwise, find the data string for the option. */
1984    
1985      if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else    if (*option_data == 0)
1986        {
1987        if (i >= argc - 1 || longopwasequals)
1988          {
1989          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1990          exit(usage(2));
1991          }
1992        option_data = argv[++i];
1993        }
1994    
1995      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1996      multiple times to create a list of patterns. */
1997    
1998      if (op->type == OP_PATLIST)
1999        {
2000        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2001          {
2002          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2003            MAX_PATTERN_COUNT);
2004          return 2;
2005          }
2006        patterns[cmd_pattern_count++] = option_data;
2007        }
2008    
2009      /* Otherwise, deal with single string or numeric data values. */
2010    
2011      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2012        {
2013        *((char **)op->dataptr) = option_data;
2014        }
2015      else
2016        {
2017        char *endptr;
2018        int n = strtoul(option_data, &endptr, 10);
2019        if (*endptr != 0)
2020        {        {
2021        char *endptr;        if (longop)
       int n = strtoul(option_data, &endptr, 10);  
       if (*endptr != 0)  
2022          {          {
2023          if (longop)          char *equals = strchr(op->long_name, '=');
2024            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2025              option_data, op->long_name);            equals - op->long_name;
2026          else          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2027            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",            option_data, nlen, op->long_name);
             option_data, op->one_char);  
         exit(usage(2));  
2028          }          }
2029        *((int *)op->dataptr) = n;        else
2030            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2031              option_data, op->one_char);
2032          exit(usage(2));
2033        }        }
2034        *((int *)op->dataptr) = n;
2035      }      }
2036    }    }
2037    
# Line 1001  if (both_context > 0) Line 2044  if (both_context > 0)
2044    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2045    }    }
2046    
2047  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2048  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  However, the latter two set the only_matching flag. */
2049    
2050  if (pattern_list == NULL || hints_list == NULL)  if ((only_matching && (file_offsets || line_offsets)) ||
2051        (file_offsets && line_offsets))
2052    {    {
2053    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2054    return 2;      "and/or --line-offsets\n");
2055      exit(usage(2));
2056    }    }
2057    
2058  /* Compile the regular expression(s). */  if (file_offsets || line_offsets) only_matching = TRUE;
2059    
2060  if (pattern_filename != NULL)  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2061    LC_ALL environment variable is set, and if so, use it. */
2062    
2063    if (locale == NULL)
2064      {
2065      locale = getenv("LC_ALL");
2066      locale_from = "LCC_ALL";
2067      }
2068    
2069    if (locale == NULL)
2070    {    {
2071    FILE *f = fopen(pattern_filename, "r");    locale = getenv("LC_CTYPE");
2072    char buffer[MBUFTHIRD + 16];    locale_from = "LC_CTYPE";
2073    char *rdstart;    }
   int adjust = 0;  
2074    
2075    if (f == NULL)  /* If a locale has been provided, set it, and generate the tables the PCRE
2076    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2077    
2078    if (locale != NULL)
2079      {
2080      if (setlocale(LC_CTYPE, locale) == NULL)
2081      {      {
2082      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2083        strerror(errno));        locale, locale_from);
2084      return 2;      return 2;
2085      }      }
2086      pcretables = pcre_maketables();
2087      }
2088    
2089    /* Sort out colouring */
2090    
2091    if (whole_lines)  if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2092      {
2093      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2094      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2095      else
2096      {      {
2097      strcpy(buffer, "^(?:");      fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2098      adjust = 4;        colour_option);
2099        return 2;
2100        }
2101      if (do_colour)
2102        {
2103        char *cs = getenv("PCREGREP_COLOUR");
2104        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2105        if (cs != NULL) colour_string = cs;
2106      }      }
2107    else if (word_match)    }
2108    
2109    /* Interpret the newline type; the default settings are Unix-like. */
2110    
2111    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2112      {
2113      pcre_options |= PCRE_NEWLINE_CR;
2114      endlinetype = EL_CR;
2115      }
2116    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2117      {
2118      pcre_options |= PCRE_NEWLINE_LF;
2119      endlinetype = EL_LF;
2120      }
2121    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2122      {
2123      pcre_options |= PCRE_NEWLINE_CRLF;
2124      endlinetype = EL_CRLF;
2125      }
2126    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2127      {
2128      pcre_options |= PCRE_NEWLINE_ANY;
2129      endlinetype = EL_ANY;
2130      }
2131    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2132      {
2133      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2134      endlinetype = EL_ANYCRLF;
2135      }
2136    else
2137      {
2138      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2139      return 2;
2140      }
2141    
2142    /* Interpret the text values for -d and -D */
2143    
2144    if (dee_option != NULL)
2145      {
2146      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2147      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2148      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2149      else
2150      {      {
2151      strcpy(buffer, "\\b");      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2152      adjust = 2;      return 2;
2153      }      }
2154      }
2155    
2156    rdstart = buffer + adjust;  if (DEE_option != NULL)
2157    while (fgets(rdstart, MBUFTHIRD, f) != NULL)    {
2158      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2159      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2160      else
2161      {      {
2162      char *s = rdstart + (int)strlen(rdstart);      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2163      if (pattern_count >= MAX_PATTERN_COUNT)      return 2;
       {  
       fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",  
         MAX_PATTERN_COUNT);  
       return 2;  
       }  
     while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;  
     if (s == rdstart) continue;  
     if (whole_lines) strcpy(s, ")$");  
       else if (word_match)strcpy(s, "\\b");  
         else *s = 0;  
     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,  
       &errptr, NULL);  
     if (pattern_list[pattern_count++] == NULL)  
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr - adjust, error);  
       return 2;  
       }  
2164      }      }
   fclose(f);  
2165    }    }
2166    
2167  /* If no file name, a single regex must be given inline. */  /* Check the values for Jeffrey Friedl's debugging options. */
2168    
2169  else  #ifdef JFRIEDL_DEBUG
2170    if (S_arg > 9)
2171    {    {
2172    char buffer[MBUFTHIRD + 16];    fprintf(stderr, "pcregrep: bad value for -S option\n");
2173    char *pat;    return 2;
2174    int adjust = 0;    }
2175    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2176      {
2177      if (jfriedl_XT == 0) jfriedl_XT = 1;
2178      if (jfriedl_XR == 0) jfriedl_XR = 1;
2179      }
2180    #endif
2181    
2182    /* Get memory to store the pattern and hints lists. */
2183    
2184    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2185    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2186    
2187    if (pattern_list == NULL || hints_list == NULL)
2188      {
2189      fprintf(stderr, "pcregrep: malloc failed\n");
2190      goto EXIT2;
2191      }
2192    
2193    /* If no patterns were provided by -e, and there is no file provided by -f,
2194    the first argument is the one and only pattern, and it must exist. */
2195    
2196    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2197      {
2198    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
2199      patterns[cmd_pattern_count++] = argv[i++];
2200      }
2201    
2202    if (whole_lines)  /* Compile the patterns that were provided on the command line, either by
2203    multiple uses of -e or as a single unkeyed pattern. */
2204    
2205    for (j = 0; j < cmd_pattern_count; j++)
2206      {
2207      if (!compile_pattern(patterns[j], pcre_options, NULL,
2208           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2209        goto EXIT2;
2210      }
2211    
2212    /* Compile the regular expressions that are provided in a file. */
2213    
2214    if (pattern_filename != NULL)
2215      {
2216      int linenumber = 0;
2217      FILE *f;
2218      char *filename;
2219      char buffer[MBUFTHIRD];
2220    
2221      if (strcmp(pattern_filename, "-") == 0)
2222      {      {
2223      sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);      f = stdin;
2224      pat = buffer;      filename = stdin_name;
     adjust = 4;  
2225      }      }
2226    else if (word_match)    else
2227      {      {
2228      sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);      f = fopen(pattern_filename, "r");
2229      pat = buffer;      if (f == NULL)
2230      adjust = 2;        {
2231          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2232            strerror(errno));
2233          goto EXIT2;
2234          }
2235        filename = pattern_filename;
2236      }      }
   else pat = argv[i++];  
   
   pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);  
2237    
2238    if (pattern_list[0] == NULL)    while (fgets(buffer, MBUFTHIRD, f) != NULL)
2239      {      {
2240      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",      char *s = buffer + (int)strlen(buffer);
2241        errptr - adjust, error);      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2242      return 2;      *s = 0;
2243        linenumber++;
2244        if (buffer[0] == 0) continue;   /* Skip blank lines */
2245        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2246          goto EXIT2;
2247      }      }
2248    pattern_count++;  
2249      if (f != stdin) fclose(f);
2250    }    }
2251    
2252  /* Study the regular expressions, as we will be running them many times */  /* Study the regular expressions, as we will be running them many times */
# Line 1109  for (j = 0; j < pattern_count; j++) Line 2259  for (j = 0; j < pattern_count; j++)
2259      char s[16];      char s[16];
2260      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2261      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2262      return 2;      goto EXIT2;
2263      }      }
2264      hint_count++;
2265    }    }
2266    
2267  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2268    
2269  if (exclude_pattern != NULL)  if (exclude_pattern != NULL)
2270    {    {
2271    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);    exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2272        pcretables);
2273    if (exclude_compiled == NULL)    if (exclude_compiled == NULL)
2274      {      {
2275      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2276        errptr, error);        errptr, error);
2277      return 2;      goto EXIT2;
2278      }      }
2279    }    }
2280    
2281  if (include_pattern != NULL)  if (include_pattern != NULL)
2282    {    {
2283    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);    include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2284        pcretables);
2285    if (include_compiled == NULL)    if (include_compiled == NULL)
2286      {      {
2287      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2288        errptr, error);        errptr, error);
2289      return 2;      goto EXIT2;
2290      }      }
2291    }    }
2292    
2293  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2294    
2295  if (i >= argc) return pcregrep(stdin,  if (i >= argc)
2296    (filenames_only || filenames_nomatch_only)? stdin_name : NULL);    {
2297      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2298      goto EXIT;
2299      }
2300    
2301  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2302  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2303  the file name if the argument is not a directory and filenames_only is not set.  the file name if the argument is not a directory and filenames are not
2304  */  otherwise forced. */
2305    
2306  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2307    
2308  for (; i < argc; i++)  for (; i < argc; i++)
2309    {    {
2310    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2311        only_one_at_top);
2312    if (frc > 1) rc = frc;    if (frc > 1) rc = frc;
2313      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2314    }    }
2315    
2316    EXIT:
2317    if (pattern_list != NULL)
2318      {
2319      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2320      free(pattern_list);
2321      }
2322    if (hints_list != NULL)
2323      {
2324      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2325      free(hints_list);
2326      }
2327  return rc;  return rc;
2328    
2329    EXIT2:
2330    rc = 2;
2331    goto EXIT;
2332  }  }
2333    
2334  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.77  
changed lines
  Added in v.286

  ViewVC Help
Powered by ViewVC 1.1.5