/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 564 by ph10, Sun Oct 31 16:07:24 2010 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2010 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44    #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 17  its pattern matching. */ Line 70  its pattern matching. */
70    
71  typedef int BOOL;  typedef int BOOL;
72    
73    #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define MBUFTHIRD BUFSIZ
78    #else
79    #define MBUFTHIRD 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
118  *               Global variables                 *  *               Global variables                 *
119  *************************************************/  *************************************************/
120    
121  static pcre *pattern;  /* Jeffrey Friedl has some debugging requirements that are not part of the
122  static pcre_extra *hints;  regular code. */
123    
124    #ifdef JFRIEDL_DEBUG
125    static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130    #endif
131    
132    static int  endlinetype;
133    
134    static char *colour_string = (char *)"1;31";
135    static char *colour_option = NULL;
136    static char *dee_option = NULL;
137    static char *DEE_option = NULL;
138    static char *newline = NULL;
139    static char *pattern_filename = NULL;
140    static char *stdin_name = (char *)"(standard input)";
141    static char *locale = NULL;
142    
143    static const unsigned char *pcretables = NULL;
144    
145    static int  pattern_count = 0;
146    static pcre **pattern_list = NULL;
147    static pcre_extra **hints_list = NULL;
148    
149    static char *include_pattern = NULL;
150    static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154    static pcre *include_compiled = NULL;
155    static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159    static int after_context = 0;
160    static int before_context = 0;
161    static int both_context = 0;
162    static int dee_action = dee_READ;
163    static int DEE_action = DEE_READ;
164    static int error_count = 0;
165    static int filenames = FN_DEFAULT;
166    static int process_options = 0;
167    
168    static unsigned long int match_limit = 0;
169    static unsigned long int match_limit_recursion = 0;
170    
171  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
172  static BOOL filenames_only = FALSE;  static BOOL do_colour = FALSE;
173    static BOOL file_offsets = FALSE;
174    static BOOL hyphenpending = FALSE;
175  static BOOL invert = FALSE;  static BOOL invert = FALSE;
176    static BOOL line_buffered = FALSE;
177    static BOOL line_offsets = FALSE;
178    static BOOL multiline = FALSE;
179  static BOOL number = FALSE;  static BOOL number = FALSE;
180    static BOOL omit_zero_count = FALSE;
181    static BOOL only_matching = FALSE;
182    static BOOL resource_error = FALSE;
183    static BOOL quiet = FALSE;
184  static BOOL silent = FALSE;  static BOOL silent = FALSE;
185  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
186    
187    /* Structure for options and list of them */
188    
189    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
190           OP_PATLIST };
191    
192    typedef struct option_item {
193      int type;
194      int one_char;
195      void *dataptr;
196      const char *long_name;
197      const char *help_text;
198    } option_item;
199    
200    /* Options without a single-letter equivalent get a negative value. This can be
201    used to identify them. */
202    
203    #define N_COLOUR       (-1)
204    #define N_EXCLUDE      (-2)
205    #define N_EXCLUDE_DIR  (-3)
206    #define N_HELP         (-4)
207    #define N_INCLUDE      (-5)
208    #define N_INCLUDE_DIR  (-6)
209    #define N_LABEL        (-7)
210    #define N_LOCALE       (-8)
211    #define N_NULL         (-9)
212    #define N_LOFFSETS     (-10)
213    #define N_FOFFSETS     (-11)
214    #define N_LBUFFER      (-12)
215    #define N_M_LIMIT      (-13)
216    #define N_M_LIMIT_REC  (-14)
217    
218    static option_item optionlist[] = {
219      { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
220      { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
221      { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
222      { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
223      { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
224      { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
225      { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
226      { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
227      { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
228      { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
229      { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
230      { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
231      { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
232      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
233      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
234      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
235      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
236      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
237      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
238      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
239      { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
240      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
241      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
242      { OP_NUMBER,    N_M_LIMIT,&match_limit,      "match-limit=number", "set PCRE match limit option" },
243      { OP_NUMBER,    N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
245      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
247      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
248      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
249      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
250      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
251      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
252      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
253      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
254    #ifdef JFRIEDL_DEBUG
255      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
256    #endif
257      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
258      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
259      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
260      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
261      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
262      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
263      { OP_NODATA,    0,        NULL,               NULL,            NULL }
264    };
265    
266    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
267    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
268    that the combination of -w and -x has the same effect as -x on its own, so we
269    can treat them as the same. */
270    
271    static const char *prefix[] = {
272      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
273    
274    static const char *suffix[] = {
275      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
276    
277    /* UTF-8 tables - used only when the newline setting is "any". */
278    
279    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
280    
281    const char utf8_table4[] = {
282      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
283      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
284      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
285      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
286    
287    
288    
289    /*************************************************
290    *            OS-specific functions               *
291    *************************************************/
292    
293    /* These functions are defined so that they can be made system specific,
294    although at present the only ones are for Unix, Win32, and for "no support". */
295    
296    
297    /************* Directory scanning in Unix ***********/
298    
299    #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
300    #include <sys/types.h>
301    #include <sys/stat.h>
302    #include <dirent.h>
303    
304    typedef DIR directory_type;
305    
306    static int
307    isdirectory(char *filename)
308    {
309    struct stat statbuf;
310    if (stat(filename, &statbuf) < 0)
311      return 0;        /* In the expectation that opening as a file will fail */
312    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
313    }
314    
315    static directory_type *
316    opendirectory(char *filename)
317    {
318    return opendir(filename);
319    }
320    
321    static char *
322    readdirectory(directory_type *dir)
323    {
324    for (;;)
325      {
326      struct dirent *dent = readdir(dir);
327      if (dent == NULL) return NULL;
328      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
329        return dent->d_name;
330      }
331    /* Control never reaches here */
332    }
333    
334    static void
335    closedirectory(directory_type *dir)
336    {
337    closedir(dir);
338    }
339    
340    
341    /************* Test for regular file in Unix **********/
342    
343    static int
344    isregfile(char *filename)
345    {
346    struct stat statbuf;
347    if (stat(filename, &statbuf) < 0)
348      return 1;        /* In the expectation that opening as a file will fail */
349    return (statbuf.st_mode & S_IFMT) == S_IFREG;
350    }
351    
352    
353    /************* Test for a terminal in Unix **********/
354    
355    static BOOL
356    is_stdout_tty(void)
357    {
358    return isatty(fileno(stdout));
359    }
360    
361    static BOOL
362    is_file_tty(FILE *f)
363    {
364    return isatty(fileno(f));
365    }
366    
367    
368    /************* Directory scanning in Win32 ***********/
369    
370    /* I (Philip Hazel) have no means of testing this code. It was contributed by
371    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
372    when it did not exist. David Byron added a patch that moved the #include of
373    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
374    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
375    undefined when it is indeed undefined. */
376    
377    #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
378    
379    #ifndef STRICT
380    # define STRICT
381    #endif
382    #ifndef WIN32_LEAN_AND_MEAN
383    # define WIN32_LEAN_AND_MEAN
384    #endif
385    
386    #include <windows.h>
387    
388    #ifndef INVALID_FILE_ATTRIBUTES
389    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
390    #endif
391    
392    typedef struct directory_type
393    {
394    HANDLE handle;
395    BOOL first;
396    WIN32_FIND_DATA data;
397    } directory_type;
398    
399    int
400    isdirectory(char *filename)
401    {
402    DWORD attr = GetFileAttributes(filename);
403    if (attr == INVALID_FILE_ATTRIBUTES)
404      return 0;
405    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
406    }
407    
408    directory_type *
409    opendirectory(char *filename)
410    {
411    size_t len;
412    char *pattern;
413    directory_type *dir;
414    DWORD err;
415    len = strlen(filename);
416    pattern = (char *) malloc(len + 3);
417    dir = (directory_type *) malloc(sizeof(*dir));
418    if ((pattern == NULL) || (dir == NULL))
419      {
420      fprintf(stderr, "pcregrep: malloc failed\n");
421      pcregrep_exit(2);
422      }
423    memcpy(pattern, filename, len);
424    memcpy(&(pattern[len]), "\\*", 3);
425    dir->handle = FindFirstFile(pattern, &(dir->data));
426    if (dir->handle != INVALID_HANDLE_VALUE)
427      {
428      free(pattern);
429      dir->first = TRUE;
430      return dir;
431      }
432    err = GetLastError();
433    free(pattern);
434    free(dir);
435    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
436    return NULL;
437    }
438    
439    char *
440    readdirectory(directory_type *dir)
441    {
442    for (;;)
443      {
444      if (!dir->first)
445        {
446        if (!FindNextFile(dir->handle, &(dir->data)))
447          return NULL;
448        }
449      else
450        {
451        dir->first = FALSE;
452        }
453      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
454        return dir->data.cFileName;
455      }
456    #ifndef _MSC_VER
457    return NULL;   /* Keep compiler happy; never executed */
458    #endif
459    }
460    
461    void
462    closedirectory(directory_type *dir)
463    {
464    FindClose(dir->handle);
465    free(dir);
466    }
467    
468    
469    /************* Test for regular file in Win32 **********/
470    
471    /* I don't know how to do this, or if it can be done; assume all paths are
472    regular if they are not directories. */
473    
474    int isregfile(char *filename)
475    {
476    return !isdirectory(filename);
477    }
478    
479    
480    /************* Test for a terminal in Win32 **********/
481    
482    /* I don't know how to do this; assume never */
483    
484    static BOOL
485    is_stdout_tty(void)
486    {
487    return FALSE;
488    }
489    
490    static BOOL
491    is_file_tty(FILE *f)
492    {
493    return FALSE;
494    }
495    
496    
497    /************* Directory scanning when we can't do it ***********/
498    
499    /* The type is void, and apart from isdirectory(), the functions do nothing. */
500    
501    #else
502    
503    typedef void directory_type;
504    
505    int isdirectory(char *filename) { return 0; }
506    directory_type * opendirectory(char *filename) { return (directory_type*)0;}
507    char *readdirectory(directory_type *dir) { return (char*)0;}
508    void closedirectory(directory_type *dir) {}
509    
510    
511    /************* Test for regular when we can't do it **********/
512    
513    /* Assume all files are regular. */
514    
515    int isregfile(char *filename) { return 1; }
516    
517    
518    /************* Test for a terminal when we can't do it **********/
519    
520    static BOOL
521    is_stdout_tty(void)
522    {
523    return FALSE;
524    }
525    
526    static BOOL
527    is_file_tty(FILE *f)
528    {
529    return FALSE;
530    }
531    
532    #endif
533    
534    
535    
536  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
537  /*************************************************  /*************************************************
538  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
539  *************************************************/  *************************************************/
# Line 58  return sys_errlist[n]; Line 556  return sys_errlist[n];
556    
557    
558  /*************************************************  /*************************************************
559  *              Grep an individual file           *  *         Exit from the program                  *
560  *************************************************/  *************************************************/
561    
562  static int  /* If there has been a resource error, give a suitable message.
563  pcregrep(FILE *in, char *name)  
564    Argument:  the return code
565    Returns:   does not return
566    */
567    
568    static void
569    pcregrep_exit(int rc)
570  {  {
571  int rc = 1;  if (resource_error)
572  int linenumber = 0;    {
573  int count = 0;    fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
574  int offsets[99];      "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
575  char buffer[BUFSIZ];    fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
576      }
577    
578    exit(rc);
579    }
580    
581    
582    
583    /*************************************************
584    *            Read one line of input              *
585    *************************************************/
586    
587  while (fgets(buffer, sizeof(buffer), in) != NULL)  /* Normally, input is read using fread() into a large buffer, so many lines may
588    be read at once. However, doing this for tty input means that no output appears
589    until a lot of input has been typed. Instead, tty input is handled line by
590    line. We cannot use fgets() for this, because it does not stop at a binary
591    zero, and therefore there is no way of telling how many characters it has read,
592    because there may be binary zeros embedded in the data.
593    
594    Arguments:
595      buffer     the buffer to read into
596      length     the maximum number of characters to read
597      f          the file
598    
599    Returns:     the number of characters read, zero at end of file
600    */
601    
602    static int
603    read_one_line(char *buffer, int length, FILE *f)
604    {
605    int c;
606    int yield = 0;
607    while ((c = fgetc(f)) != EOF)
608    {    {
609    BOOL match;    buffer[yield++] = c;
610    int length = (int)strlen(buffer);    if (c == '\n' || yield >= length) break;
611    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    }
612    linenumber++;  return yield;
613    }
614    
   match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;  
   if (match && whole_lines && offsets[1] != length) match = FALSE;  
615    
616    if (match != invert)  
617    /*************************************************
618    *             Find end of line                   *
619    *************************************************/
620    
621    /* The length of the endline sequence that is found is set via lenptr. This may
622    be zero at the very end of the file if there is no line-ending sequence there.
623    
624    Arguments:
625      p         current position in line
626      endptr    end of available data
627      lenptr    where to put the length of the eol sequence
628    
629    Returns:    pointer to the last byte of the line
630    */
631    
632    static char *
633    end_of_line(char *p, char *endptr, int *lenptr)
634    {
635    switch(endlinetype)
636      {
637      default:      /* Just in case */
638      case EL_LF:
639      while (p < endptr && *p != '\n') p++;
640      if (p < endptr)
641      {      {
642      if (count_only) count++;      *lenptr = 1;
643        return p + 1;
644        }
645      *lenptr = 0;
646      return endptr;
647    
648      else if (filenames_only)    case EL_CR:
649      while (p < endptr && *p != '\r') p++;
650      if (p < endptr)
651        {
652        *lenptr = 1;
653        return p + 1;
654        }
655      *lenptr = 0;
656      return endptr;
657    
658      case EL_CRLF:
659      for (;;)
660        {
661        while (p < endptr && *p != '\r') p++;
662        if (++p >= endptr)
663        {        {
664        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        *lenptr = 0;
665        return 0;        return endptr;
666          }
667        if (*p == '\n')
668          {
669          *lenptr = 2;
670          return p + 1;
671        }        }
672        }
673      break;
674    
675      else if (silent) return 0;    case EL_ANYCRLF:
676      while (p < endptr)
677        {
678        int extra = 0;
679        register int c = *((unsigned char *)p);
680    
681      else      if (utf8 && c >= 0xc0)
682        {        {
683        if (name != NULL) fprintf(stdout, "%s:", name);        int gcii, gcss;
684        if (number) fprintf(stdout, "%d:", linenumber);        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
685        fprintf(stdout, "%s\n", buffer);        gcss = 6*extra;
686          c = (c & utf8_table3[extra]) << gcss;
687          for (gcii = 1; gcii <= extra; gcii++)
688            {
689            gcss -= 6;
690            c |= (p[gcii] & 0x3f) << gcss;
691            }
692        }        }
693    
694      rc = 0;      p += 1 + extra;
     }  
   }  
695    
696  if (count_only)      switch (c)
697    {        {
698    if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
699    fprintf(stdout, "%d\n", count);        *lenptr = 1;
700    }        return p;
701    
702          case 0x0d:    /* CR */
703          if (p < endptr && *p == 0x0a)
704            {
705            *lenptr = 2;
706            p++;
707            }
708          else *lenptr = 1;
709          return p;
710    
711  return rc;        default:
712  }        break;
713          }
714        }   /* End of loop for ANYCRLF case */
715    
716      *lenptr = 0;  /* Must have hit the end */
717      return endptr;
718    
719      case EL_ANY:
720      while (p < endptr)
721        {
722        int extra = 0;
723        register int c = *((unsigned char *)p);
724    
725        if (utf8 && c >= 0xc0)
726          {
727          int gcii, gcss;
728          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
729          gcss = 6*extra;
730          c = (c & utf8_table3[extra]) << gcss;
731          for (gcii = 1; gcii <= extra; gcii++)
732            {
733            gcss -= 6;
734            c |= (p[gcii] & 0x3f) << gcss;
735            }
736          }
737    
738  /*************************************************      p += 1 + extra;
 *                Usage function                  *  
 *************************************************/  
739    
740  static int      switch (c)
741  usage(int rc)        {
742  {        case 0x0a:    /* LF */
743  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");        case 0x0b:    /* VT */
744  return rc;        case 0x0c:    /* FF */
745  }        *lenptr = 1;
746          return p;
747    
748          case 0x0d:    /* CR */
749          if (p < endptr && *p == 0x0a)
750            {
751            *lenptr = 2;
752            p++;
753            }
754          else *lenptr = 1;
755          return p;
756    
757          case 0x85:    /* NEL */
758          *lenptr = utf8? 2 : 1;
759          return p;
760    
761          case 0x2028:  /* LS */
762          case 0x2029:  /* PS */
763          *lenptr = 3;
764          return p;
765    
766          default:
767          break;
768          }
769        }   /* End of loop for ANY case */
770    
771      *lenptr = 0;  /* Must have hit the end */
772      return endptr;
773      }     /* End of overall switch */
774    }
775    
776    
777    
778  /*************************************************  /*************************************************
779  *                Main program                    *  *         Find start of previous line            *
780  *************************************************/  *************************************************/
781    
782  int  /* This is called when looking back for before lines to print.
 main(int argc, char **argv)  
 {  
 int i;  
 int rc = 1;  
 int options = 0;  
 int errptr;  
 const char *error;  
 BOOL filenames = TRUE;  
783    
784  /* Process the options */  Arguments:
785      p         start of the subsequent line
786      startptr  start of available data
787    
788  for (i = 1; i < argc; i++)  Returns:    pointer to the start of the previous line
789    */
790    
791    static char *
792    previous_line(char *p, char *startptr)
793    {
794    switch(endlinetype)
795    {    {
796    char *s;    default:      /* Just in case */
797    if (argv[i][0] != '-') break;    case EL_LF:
798    s = argv[i] + 1;    p--;
799    while (*s != 0)    while (p > startptr && p[-1] != '\n') p--;
800      return p;
801    
802      case EL_CR:
803      p--;
804      while (p > startptr && p[-1] != '\n') p--;
805      return p;
806    
807      case EL_CRLF:
808      for (;;)
809        {
810        p -= 2;
811        while (p > startptr && p[-1] != '\n') p--;
812        if (p <= startptr + 1 || p[-2] == '\r') return p;
813        }
814      return p;   /* But control should never get here */
815    
816      case EL_ANY:
817      case EL_ANYCRLF:
818      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
819      if (utf8) while ((*p & 0xc0) == 0x80) p--;
820    
821      while (p > startptr)
822      {      {
823      switch (*s++)      register int c;
824        char *pp = p - 1;
825    
826        if (utf8)
827          {
828          int extra = 0;
829          while ((*pp & 0xc0) == 0x80) pp--;
830          c = *((unsigned char *)pp);
831          if (c >= 0xc0)
832            {
833            int gcii, gcss;
834            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
835            gcss = 6*extra;
836            c = (c & utf8_table3[extra]) << gcss;
837            for (gcii = 1; gcii <= extra; gcii++)
838              {
839              gcss -= 6;
840              c |= (pp[gcii] & 0x3f) << gcss;
841              }
842            }
843          }
844        else c = *((unsigned char *)pp);
845    
846        if (endlinetype == EL_ANYCRLF) switch (c)
847        {        {
848        case 'c': count_only = TRUE; break;        case 0x0a:    /* LF */
849        case 'h': filenames = FALSE; break;        case 0x0d:    /* CR */
850        case 'i': options |= PCRE_CASELESS; break;        return p;
       case 'l': filenames_only = TRUE;  
       case 'n': number = TRUE; break;  
       case 's': silent = TRUE; break;  
       case 'v': invert = TRUE; break;  
       case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
851    
852        case 'V':        default:
       fprintf(stderr, "PCRE version %s\n", pcre_version());  
853        break;        break;
854          }
855    
856        else switch (c)
857          {
858          case 0x0a:    /* LF */
859          case 0x0b:    /* VT */
860          case 0x0c:    /* FF */
861          case 0x0d:    /* CR */
862          case 0x85:    /* NEL */
863          case 0x2028:  /* LS */
864          case 0x2029:  /* PS */
865          return p;
866    
867        default:        default:
868        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);        break;
       return usage(2);  
869        }        }
     }  
   }  
870    
871  /* There must be at least a regexp argument */      p = pp;  /* Back one character */
872        }        /* End of loop for ANY case */
873    
874  if (i >= argc) return usage(0);    return startptr;  /* Hit start of data */
875      }     /* End of overall switch */
876    }
877    
 /* Compile the regular expression. */  
878    
 pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  
 if (pattern == NULL)  
   {  
   fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);  
   return 2;  
   }  
879    
 /* Study the regular expression, as we will be running it may times */  
880    
881  hints = pcre_study(pattern, 0, &error);  
882  if (error != NULL)  /*************************************************
883    *       Print the previous "after" lines         *
884    *************************************************/
885    
886    /* This is called if we are about to lose said lines because of buffer filling,
887    and at the end of the file. The data in the line is written using fwrite() so
888    that a binary zero does not terminate it.
889    
890    Arguments:
891      lastmatchnumber   the number of the last matching line, plus one
892      lastmatchrestart  where we restarted after the last match
893      endptr            end of available data
894      printname         filename for printing
895    
896    Returns:            nothing
897    */
898    
899    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
900      char *endptr, char *printname)
901    {
902    if (after_context > 0 && lastmatchnumber > 0)
903    {    {
904    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);    int count = 0;
905    return 2;    while (lastmatchrestart < endptr && count++ < after_context)
906        {
907        int ellength;
908        char *pp = lastmatchrestart;
909        if (printname != NULL) fprintf(stdout, "%s-", printname);
910        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
911        pp = end_of_line(pp, endptr, &ellength);
912        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
913        lastmatchrestart = pp;
914        }
915      hyphenpending = TRUE;
916    }    }
917    }
918    
 /* If there are no further arguments, do the business on stdin and exit */  
919    
 if (i >= argc) return pcregrep(stdin, NULL);  
920    
921  /* Otherwise, work through the remaining arguments as files. If there is only  /*************************************************
922  one, don't give its name on the output. */  *   Apply patterns to subject till one matches   *
923    *************************************************/
924    
925  if (i == argc - 1) filenames = FALSE;  /* This function is called to run through all patterns, looking for a match. It
926  if (filenames_only) filenames = TRUE;  is used multiple times for the same subject when colouring is enabled, in order
927    to find all possible matches.
928    
929    Arguments:
930      matchptr    the start of the subject
931      length      the length of the subject to match
932      offsets     the offets vector to fill in
933      mrc         address of where to put the result of pcre_exec()
934    
935    Returns:      TRUE if there was a match
936                  FALSE if there was no match
937                  invert if there was a non-fatal error
938    */
939    
940  for (; i < argc; i++)  static BOOL
941    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
942    {
943    int i;
944    size_t slen = length;
945    const char *msg = "this text:\n\n";
946    if (slen > 200)
947    {    {
948    FILE *in = fopen(argv[i], "r");    slen = 200;
949    if (in == NULL)    msg = "text that starts:\n\n";
950      {    }
951      fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));  for (i = 0; i < pattern_count; i++)
952      rc = 2;    {
953      }    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
954    else      PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
955      if (*mrc >= 0) return TRUE;
956      if (*mrc == PCRE_ERROR_NOMATCH) continue;
957      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
958      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
959      fprintf(stderr, "%s", msg);
960      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
961      fprintf(stderr, "\n\n");
962      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
963        resource_error = TRUE;
964      if (error_count++ > 20)
965      {      {
966      int frc = pcregrep(in, filenames? argv[i] : NULL);      fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
967      if (frc == 0 && rc == 1) rc = 0;      pcregrep_exit(2);
     fclose(in);  
968      }      }
969      return invert;    /* No more matching; don't show the line again */
970    }    }
971    
972    return FALSE;  /* No match, no errors */
973    }
974    
975    
976    
977    /*************************************************
978    *            Grep an individual file             *
979    *************************************************/
980    
981    /* This is called from grep_or_recurse() below. It uses a buffer that is three
982    times the value of MBUFTHIRD. The matching point is never allowed to stray into
983    the top third of the buffer, thus keeping more of the file available for
984    context printing or for multiline scanning. For large files, the pointer will
985    be in the middle third most of the time, so the bottom third is available for
986    "before" context printing.
987    
988    Arguments:
989      handle       the fopened FILE stream for a normal file
990                   the gzFile pointer when reading is via libz
991                   the BZFILE pointer when reading is via libbz2
992      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
993      printname    the file name if it is to be printed for each match
994                   or NULL if the file name is not to be printed
995                   it cannot be NULL if filenames[_nomatch]_only is set
996    
997    Returns:       0 if there was at least one match
998                   1 otherwise (no matches)
999                   2 if there is a read error on a .bz2 file
1000    */
1001    
1002    static int
1003    pcregrep(void *handle, int frtype, char *printname)
1004    {
1005    int rc = 1;
1006    int linenumber = 1;
1007    int lastmatchnumber = 0;
1008    int count = 0;
1009    int filepos = 0;
1010    int offsets[OFFSET_SIZE];
1011    char *lastmatchrestart = NULL;
1012    char buffer[3*MBUFTHIRD];
1013    char *ptr = buffer;
1014    char *endptr;
1015    size_t bufflength;
1016    BOOL endhyphenpending = FALSE;
1017    BOOL input_line_buffered = line_buffered;
1018    FILE *in = NULL;                    /* Ensure initialized */
1019    
1020    #ifdef SUPPORT_LIBZ
1021    gzFile ingz = NULL;
1022    #endif
1023    
1024    #ifdef SUPPORT_LIBBZ2
1025    BZFILE *inbz2 = NULL;
1026    #endif
1027    
1028    
1029    /* Do the first read into the start of the buffer and set up the pointer to end
1030    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1031    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1032    fail. */
1033    
1034    #ifdef SUPPORT_LIBZ
1035    if (frtype == FR_LIBZ)
1036      {
1037      ingz = (gzFile)handle;
1038      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1039      }
1040    else
1041    #endif
1042    
1043    #ifdef SUPPORT_LIBBZ2
1044    if (frtype == FR_LIBBZ2)
1045      {
1046      inbz2 = (BZFILE *)handle;
1047      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1048      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1049      }                                    /* without the cast it is unsigned. */
1050    else
1051    #endif
1052    
1053      {
1054      in = (FILE *)handle;
1055      if (is_file_tty(in)) input_line_buffered = TRUE;
1056      bufflength = input_line_buffered?
1057        read_one_line(buffer, 3*MBUFTHIRD, in) :
1058        fread(buffer, 1, 3*MBUFTHIRD, in);
1059      }
1060    
1061    endptr = buffer + bufflength;
1062    
1063    /* Loop while the current pointer is not at the end of the file. For large
1064    files, endptr will be at the end of the buffer when we are in the middle of the
1065    file, but ptr will never get there, because as soon as it gets over 2/3 of the
1066    way, the buffer is shifted left and re-filled. */
1067    
1068    while (ptr < endptr)
1069      {
1070      int endlinelength;
1071      int mrc = 0;
1072      BOOL match;
1073      char *matchptr = ptr;
1074      char *t = ptr;
1075      size_t length, linelength;
1076    
1077      /* At this point, ptr is at the start of a line. We need to find the length
1078      of the subject string to pass to pcre_exec(). In multiline mode, it is the
1079      length remainder of the data in the buffer. Otherwise, it is the length of
1080      the next line, excluding the terminating newline. After matching, we always
1081      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1082      option is used for compiling, so that any match is constrained to be in the
1083      first line. */
1084    
1085      t = end_of_line(t, endptr, &endlinelength);
1086      linelength = t - ptr - endlinelength;
1087      length = multiline? (size_t)(endptr - ptr) : linelength;
1088    
1089      /* Extra processing for Jeffrey Friedl's debugging. */
1090    
1091    #ifdef JFRIEDL_DEBUG
1092      if (jfriedl_XT || jfriedl_XR)
1093      {
1094          #include <sys/time.h>
1095          #include <time.h>
1096          struct timeval start_time, end_time;
1097          struct timezone dummy;
1098          int i;
1099    
1100          if (jfriedl_XT)
1101          {
1102              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1103              const char *orig = ptr;
1104              ptr = malloc(newlen + 1);
1105              if (!ptr) {
1106                      printf("out of memory");
1107                      pcregrep_exit(2);
1108              }
1109              endptr = ptr;
1110              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1111              for (i = 0; i < jfriedl_XT; i++) {
1112                      strncpy(endptr, orig,  length);
1113                      endptr += length;
1114              }
1115              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1116              length = newlen;
1117          }
1118    
1119          if (gettimeofday(&start_time, &dummy) != 0)
1120                  perror("bad gettimeofday");
1121    
1122    
1123          for (i = 0; i < jfriedl_XR; i++)
1124              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1125                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1126    
1127          if (gettimeofday(&end_time, &dummy) != 0)
1128                  perror("bad gettimeofday");
1129    
1130          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1131                          -
1132                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1133    
1134          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1135          return 0;
1136      }
1137    #endif
1138    
1139      /* We come back here after a match when the -o option (only_matching) is set,
1140      in order to find any further matches in the same line. */
1141    
1142      ONLY_MATCHING_RESTART:
1143    
1144      /* Run through all the patterns until one matches or there is an error other
1145      than NOMATCH. This code is in a subroutine so that it can be re-used for
1146      finding subsequent matches when colouring matched lines. */
1147    
1148      match = match_patterns(matchptr, length, offsets, &mrc);
1149    
1150      /* If it's a match or a not-match (as required), do what's wanted. */
1151    
1152      if (match != invert)
1153        {
1154        BOOL hyphenprinted = FALSE;
1155    
1156        /* We've failed if we want a file that doesn't have any matches. */
1157    
1158        if (filenames == FN_NOMATCH_ONLY) return 1;
1159    
1160        /* Just count if just counting is wanted. */
1161    
1162        if (count_only) count++;
1163    
1164        /* If all we want is a file name, there is no need to scan any more lines
1165        in the file. */
1166    
1167        else if (filenames == FN_MATCH_ONLY)
1168          {
1169          fprintf(stdout, "%s\n", printname);
1170          return 0;
1171          }
1172    
1173        /* Likewise, if all we want is a yes/no answer. */
1174    
1175        else if (quiet) return 0;
1176    
1177        /* The --only-matching option prints just the substring that matched, and
1178        the --file-offsets and --line-offsets options output offsets for the
1179        matching substring (they both force --only-matching). None of these options
1180        prints any context. Afterwards, adjust the start and length, and then jump
1181        back to look for further matches in the same line. If we are in invert
1182        mode, however, nothing is printed - this could be still useful because the
1183        return code is set. */
1184    
1185        else if (only_matching)
1186          {
1187          if (!invert)
1188            {
1189            if (printname != NULL) fprintf(stdout, "%s:", printname);
1190            if (number) fprintf(stdout, "%d:", linenumber);
1191            if (line_offsets)
1192              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1193                offsets[1] - offsets[0]);
1194            else if (file_offsets)
1195              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1196                offsets[1] - offsets[0]);
1197            else
1198              {
1199              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1200              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1201              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1202              }
1203            fprintf(stdout, "\n");
1204            matchptr += offsets[1];
1205            length -= offsets[1];
1206            match = FALSE;
1207            if (line_buffered) fflush(stdout);
1208            rc = 0;    /* Had some success */
1209            goto ONLY_MATCHING_RESTART;
1210            }
1211          }
1212    
1213        /* This is the default case when none of the above options is set. We print
1214        the matching lines(s), possibly preceded and/or followed by other lines of
1215        context. */
1216    
1217        else
1218          {
1219          /* See if there is a requirement to print some "after" lines from a
1220          previous match. We never print any overlaps. */
1221    
1222          if (after_context > 0 && lastmatchnumber > 0)
1223            {
1224            int ellength;
1225            int linecount = 0;
1226            char *p = lastmatchrestart;
1227    
1228            while (p < ptr && linecount < after_context)
1229              {
1230              p = end_of_line(p, ptr, &ellength);
1231              linecount++;
1232              }
1233    
1234            /* It is important to advance lastmatchrestart during this printing so
1235            that it interacts correctly with any "before" printing below. Print
1236            each line's data using fwrite() in case there are binary zeroes. */
1237    
1238            while (lastmatchrestart < p)
1239              {
1240              char *pp = lastmatchrestart;
1241              if (printname != NULL) fprintf(stdout, "%s-", printname);
1242              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1243              pp = end_of_line(pp, endptr, &ellength);
1244              FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1245              lastmatchrestart = pp;
1246              }
1247            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1248            }
1249    
1250          /* If there were non-contiguous lines printed above, insert hyphens. */
1251    
1252          if (hyphenpending)
1253            {
1254            fprintf(stdout, "--\n");
1255            hyphenpending = FALSE;
1256            hyphenprinted = TRUE;
1257            }
1258    
1259          /* See if there is a requirement to print some "before" lines for this
1260          match. Again, don't print overlaps. */
1261    
1262          if (before_context > 0)
1263            {
1264            int linecount = 0;
1265            char *p = ptr;
1266    
1267            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1268                   linecount < before_context)
1269              {
1270              linecount++;
1271              p = previous_line(p, buffer);
1272              }
1273    
1274            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1275              fprintf(stdout, "--\n");
1276    
1277            while (p < ptr)
1278              {
1279              int ellength;
1280              char *pp = p;
1281              if (printname != NULL) fprintf(stdout, "%s-", printname);
1282              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1283              pp = end_of_line(pp, endptr, &ellength);
1284              FWRITE(p, 1, pp - p, stdout);
1285              p = pp;
1286              }
1287            }
1288    
1289          /* Now print the matching line(s); ensure we set hyphenpending at the end
1290          of the file if any context lines are being output. */
1291    
1292          if (after_context > 0 || before_context > 0)
1293            endhyphenpending = TRUE;
1294    
1295          if (printname != NULL) fprintf(stdout, "%s:", printname);
1296          if (number) fprintf(stdout, "%d:", linenumber);
1297    
1298          /* In multiline mode, we want to print to the end of the line in which
1299          the end of the matched string is found, so we adjust linelength and the
1300          line number appropriately, but only when there actually was a match
1301          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1302          the match will always be before the first newline sequence. */
1303    
1304          if (multiline)
1305            {
1306            int ellength;
1307            char *endmatch = ptr;
1308            if (!invert)
1309              {
1310              endmatch += offsets[1];
1311              t = ptr;
1312              while (t < endmatch)
1313                {
1314                t = end_of_line(t, endptr, &ellength);
1315                if (t <= endmatch) linenumber++; else break;
1316                }
1317              }
1318            endmatch = end_of_line(endmatch, endptr, &ellength);
1319            linelength = endmatch - ptr - ellength;
1320            }
1321    
1322          /*** NOTE: Use only fwrite() to output the data line, so that binary
1323          zeroes are treated as just another data character. */
1324    
1325          /* This extra option, for Jeffrey Friedl's debugging requirements,
1326          replaces the matched string, or a specific captured string if it exists,
1327          with X. When this happens, colouring is ignored. */
1328    
1329    #ifdef JFRIEDL_DEBUG
1330          if (S_arg >= 0 && S_arg < mrc)
1331            {
1332            int first = S_arg * 2;
1333            int last  = first + 1;
1334            FWRITE(ptr, 1, offsets[first], stdout);
1335            fprintf(stdout, "X");
1336            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1337            }
1338          else
1339    #endif
1340    
1341          /* We have to split the line(s) up if colouring, and search for further
1342          matches. */
1343    
1344          if (do_colour)
1345            {
1346            int last_offset = 0;
1347            FWRITE(ptr, 1, offsets[0], stdout);
1348            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1349            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1350            fprintf(stdout, "%c[00m", 0x1b);
1351            for (;;)
1352              {
1353              last_offset += offsets[1];
1354              matchptr += offsets[1];
1355              length -= offsets[1];
1356              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1357              FWRITE(matchptr, 1, offsets[0], stdout);
1358              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1359              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1360              fprintf(stdout, "%c[00m", 0x1b);
1361              }
1362            FWRITE(ptr + last_offset, 1,
1363              (linelength + endlinelength) - last_offset, stdout);
1364            }
1365    
1366          /* Not colouring; no need to search for further matches */
1367    
1368          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1369          }
1370    
1371        /* End of doing what has to be done for a match. If --line-buffered was
1372        given, flush the output. */
1373    
1374        if (line_buffered) fflush(stdout);
1375        rc = 0;    /* Had some success */
1376    
1377        /* Remember where the last match happened for after_context. We remember
1378        where we are about to restart, and that line's number. */
1379    
1380        lastmatchrestart = ptr + linelength + endlinelength;
1381        lastmatchnumber = linenumber + 1;
1382        }
1383    
1384      /* For a match in multiline inverted mode (which of course did not cause
1385      anything to be printed), we have to move on to the end of the match before
1386      proceeding. */
1387    
1388      if (multiline && invert && match)
1389        {
1390        int ellength;
1391        char *endmatch = ptr + offsets[1];
1392        t = ptr;
1393        while (t < endmatch)
1394          {
1395          t = end_of_line(t, endptr, &ellength);
1396          if (t <= endmatch) linenumber++; else break;
1397          }
1398        endmatch = end_of_line(endmatch, endptr, &ellength);
1399        linelength = endmatch - ptr - ellength;
1400        }
1401    
1402      /* Advance to after the newline and increment the line number. The file
1403      offset to the current line is maintained in filepos. */
1404    
1405      ptr += linelength + endlinelength;
1406      filepos += (int)(linelength + endlinelength);
1407      linenumber++;
1408    
1409      /* If input is line buffered, and the buffer is not yet full, read another
1410      line and add it into the buffer. */
1411    
1412      if (input_line_buffered && bufflength < sizeof(buffer))
1413        {
1414        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1415        bufflength += add;
1416        endptr += add;
1417        }
1418    
1419      /* If we haven't yet reached the end of the file (the buffer is full), and
1420      the current point is in the top 1/3 of the buffer, slide the buffer down by
1421      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1422      about to be lost, print them. */
1423    
1424      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1425        {
1426        if (after_context > 0 &&
1427            lastmatchnumber > 0 &&
1428            lastmatchrestart < buffer + MBUFTHIRD)
1429          {
1430          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1431          lastmatchnumber = 0;
1432          }
1433    
1434        /* Now do the shuffle */
1435    
1436        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1437        ptr -= MBUFTHIRD;
1438    
1439    #ifdef SUPPORT_LIBZ
1440        if (frtype == FR_LIBZ)
1441          bufflength = 2*MBUFTHIRD +
1442            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1443        else
1444    #endif
1445    
1446    #ifdef SUPPORT_LIBBZ2
1447        if (frtype == FR_LIBBZ2)
1448          bufflength = 2*MBUFTHIRD +
1449            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1450        else
1451    #endif
1452    
1453        bufflength = 2*MBUFTHIRD +
1454          (input_line_buffered?
1455           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1456           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1457        endptr = buffer + bufflength;
1458    
1459        /* Adjust any last match point */
1460    
1461        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1462        }
1463      }     /* Loop through the whole file */
1464    
1465    /* End of file; print final "after" lines if wanted; do_after_lines sets
1466    hyphenpending if it prints something. */
1467    
1468    if (!only_matching && !count_only)
1469      {
1470      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1471      hyphenpending |= endhyphenpending;
1472      }
1473    
1474    /* Print the file name if we are looking for those without matches and there
1475    were none. If we found a match, we won't have got this far. */
1476    
1477    if (filenames == FN_NOMATCH_ONLY)
1478      {
1479      fprintf(stdout, "%s\n", printname);
1480      return 0;
1481      }
1482    
1483    /* Print the match count if wanted */
1484    
1485    if (count_only)
1486      {
1487      if (count > 0 || !omit_zero_count)
1488        {
1489        if (printname != NULL && filenames != FN_NONE)
1490          fprintf(stdout, "%s:", printname);
1491        fprintf(stdout, "%d\n", count);
1492        }
1493      }
1494    
1495    return rc;
1496    }
1497    
1498    
1499    
1500    /*************************************************
1501    *     Grep a file or recurse into a directory    *
1502    *************************************************/
1503    
1504    /* Given a path name, if it's a directory, scan all the files if we are
1505    recursing; if it's a file, grep it.
1506    
1507    Arguments:
1508      pathname          the path to investigate
1509      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1510      only_one_at_top   TRUE if the path is the only one at toplevel
1511    
1512    Returns:   0 if there was at least one match
1513               1 if there were no matches
1514               2 there was some kind of error
1515    
1516    However, file opening failures are suppressed if "silent" is set.
1517    */
1518    
1519    static int
1520    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1521    {
1522    int rc = 1;
1523    int sep;
1524    int frtype;
1525    int pathlen;
1526    void *handle;
1527    FILE *in = NULL;           /* Ensure initialized */
1528    
1529    #ifdef SUPPORT_LIBZ
1530    gzFile ingz = NULL;
1531    #endif
1532    
1533    #ifdef SUPPORT_LIBBZ2
1534    BZFILE *inbz2 = NULL;
1535    #endif
1536    
1537    /* If the file name is "-" we scan stdin */
1538    
1539    if (strcmp(pathname, "-") == 0)
1540      {
1541      return pcregrep(stdin, FR_PLAIN,
1542        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1543          stdin_name : NULL);
1544      }
1545    
1546    /* If the file is a directory, skip if skipping or if we are recursing, scan
1547    each file and directory within it, subject to any include or exclude patterns
1548    that were set. The scanning code is localized so it can be made
1549    system-specific. */
1550    
1551    if ((sep = isdirectory(pathname)) != 0)
1552      {
1553      if (dee_action == dee_SKIP) return 1;
1554      if (dee_action == dee_RECURSE)
1555        {
1556        char buffer[1024];
1557        char *nextfile;
1558        directory_type *dir = opendirectory(pathname);
1559    
1560        if (dir == NULL)
1561          {
1562          if (!silent)
1563            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1564              strerror(errno));
1565          return 2;
1566          }
1567    
1568        while ((nextfile = readdirectory(dir)) != NULL)
1569          {
1570          int frc, nflen;
1571          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1572          nflen = (int)(strlen(nextfile));
1573    
1574          if (isdirectory(buffer))
1575            {
1576            if (exclude_dir_compiled != NULL &&
1577                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1578              continue;
1579    
1580            if (include_dir_compiled != NULL &&
1581                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1582              continue;
1583            }
1584          else
1585            {
1586            if (exclude_compiled != NULL &&
1587                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1588              continue;
1589    
1590            if (include_compiled != NULL &&
1591                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1592              continue;
1593            }
1594    
1595          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1596          if (frc > 1) rc = frc;
1597           else if (frc == 0 && rc == 1) rc = 0;
1598          }
1599    
1600        closedirectory(dir);
1601        return rc;
1602        }
1603      }
1604    
1605    /* If the file is not a directory and not a regular file, skip it if that's
1606    been requested. */
1607    
1608    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1609    
1610    /* Control reaches here if we have a regular file, or if we have a directory
1611    and recursion or skipping was not requested, or if we have anything else and
1612    skipping was not requested. The scan proceeds. If this is the first and only
1613    argument at top level, we don't show the file name, unless we are only showing
1614    the file name, or the filename was forced (-H). */
1615    
1616    pathlen = (int)(strlen(pathname));
1617    
1618    /* Open using zlib if it is supported and the file name ends with .gz. */
1619    
1620    #ifdef SUPPORT_LIBZ
1621    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1622      {
1623      ingz = gzopen(pathname, "rb");
1624      if (ingz == NULL)
1625        {
1626        if (!silent)
1627          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1628            strerror(errno));
1629        return 2;
1630        }
1631      handle = (void *)ingz;
1632      frtype = FR_LIBZ;
1633      }
1634    else
1635    #endif
1636    
1637    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1638    
1639    #ifdef SUPPORT_LIBBZ2
1640    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1641      {
1642      inbz2 = BZ2_bzopen(pathname, "rb");
1643      handle = (void *)inbz2;
1644      frtype = FR_LIBBZ2;
1645      }
1646    else
1647    #endif
1648    
1649    /* Otherwise use plain fopen(). The label is so that we can come back here if
1650    an attempt to read a .bz2 file indicates that it really is a plain file. */
1651    
1652    #ifdef SUPPORT_LIBBZ2
1653    PLAIN_FILE:
1654    #endif
1655      {
1656      in = fopen(pathname, "rb");
1657      handle = (void *)in;
1658      frtype = FR_PLAIN;
1659      }
1660    
1661    /* All the opening methods return errno when they fail. */
1662    
1663    if (handle == NULL)
1664      {
1665      if (!silent)
1666        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1667          strerror(errno));
1668      return 2;
1669      }
1670    
1671    /* Now grep the file */
1672    
1673    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1674      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1675    
1676    /* Close in an appropriate manner. */
1677    
1678    #ifdef SUPPORT_LIBZ
1679    if (frtype == FR_LIBZ)
1680      gzclose(ingz);
1681    else
1682    #endif
1683    
1684    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1685    read failed. If the error indicates that the file isn't in fact bzipped, try
1686    again as a normal file. */
1687    
1688    #ifdef SUPPORT_LIBBZ2
1689    if (frtype == FR_LIBBZ2)
1690      {
1691      if (rc == 2)
1692        {
1693        int errnum;
1694        const char *err = BZ2_bzerror(inbz2, &errnum);
1695        if (errnum == BZ_DATA_ERROR_MAGIC)
1696          {
1697          BZ2_bzclose(inbz2);
1698          goto PLAIN_FILE;
1699          }
1700        else if (!silent)
1701          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1702            pathname, err);
1703        }
1704      BZ2_bzclose(inbz2);
1705      }
1706    else
1707    #endif
1708    
1709    /* Normal file close */
1710    
1711    fclose(in);
1712    
1713    /* Pass back the yield from pcregrep(). */
1714    
1715    return rc;
1716    }
1717    
1718    
1719    
1720    
1721    /*************************************************
1722    *                Usage function                  *
1723    *************************************************/
1724    
1725    static int
1726    usage(int rc)
1727    {
1728    option_item *op;
1729    fprintf(stderr, "Usage: pcregrep [-");
1730    for (op = optionlist; op->one_char != 0; op++)
1731      {
1732      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1733      }
1734    fprintf(stderr, "] [long options] [pattern] [files]\n");
1735    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1736      "options.\n");
1737  return rc;  return rc;
1738  }  }
1739    
1740  /* End */  
1741    
1742    
1743    /*************************************************
1744    *                Help function                   *
1745    *************************************************/
1746    
1747    static void
1748    help(void)
1749    {
1750    option_item *op;
1751    
1752    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1753    printf("Search for PATTERN in each FILE or standard input.\n");
1754    printf("PATTERN must be present if neither -e nor -f is used.\n");
1755    printf("\"-\" can be used as a file name to mean STDIN.\n");
1756    
1757    #ifdef SUPPORT_LIBZ
1758    printf("Files whose names end in .gz are read using zlib.\n");
1759    #endif
1760    
1761    #ifdef SUPPORT_LIBBZ2
1762    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1763    #endif
1764    
1765    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1766    printf("Other files and the standard input are read as plain files.\n\n");
1767    #else
1768    printf("All files are read as plain files, without any interpretation.\n\n");
1769    #endif
1770    
1771    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1772    printf("Options:\n");
1773    
1774    for (op = optionlist; op->one_char != 0; op++)
1775      {
1776      int n;
1777      char s[4];
1778      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1779      n = 30 - printf("  %s --%s", s, op->long_name);
1780      if (n < 1) n = 1;
1781      printf("%.*s%s\n", n, "                    ", op->help_text);
1782      }
1783    
1784    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1785    printf("trailing white space is removed and blank lines are ignored.\n");
1786    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1787    
1788    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1789    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1790    }
1791    
1792    
1793    
1794    
1795    /*************************************************
1796    *    Handle a single-letter, no data option      *
1797    *************************************************/
1798    
1799    static int
1800    handle_option(int letter, int options)
1801    {
1802    switch(letter)
1803      {
1804      case N_FOFFSETS: file_offsets = TRUE; break;
1805      case N_HELP: help(); pcregrep_exit(0);
1806      case N_LOFFSETS: line_offsets = number = TRUE; break;
1807      case N_LBUFFER: line_buffered = TRUE; break;
1808      case 'c': count_only = TRUE; break;
1809      case 'F': process_options |= PO_FIXED_STRINGS; break;
1810      case 'H': filenames = FN_FORCE; break;
1811      case 'h': filenames = FN_NONE; break;
1812      case 'i': options |= PCRE_CASELESS; break;
1813      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1814      case 'L': filenames = FN_NOMATCH_ONLY; break;
1815      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1816      case 'n': number = TRUE; break;
1817      case 'o': only_matching = TRUE; break;
1818      case 'q': quiet = TRUE; break;
1819      case 'r': dee_action = dee_RECURSE; break;
1820      case 's': silent = TRUE; break;
1821      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1822      case 'v': invert = TRUE; break;
1823      case 'w': process_options |= PO_WORD_MATCH; break;
1824      case 'x': process_options |= PO_LINE_MATCH; break;
1825    
1826      case 'V':
1827      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1828      pcregrep_exit(0);
1829      break;
1830    
1831      default:
1832      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1833      pcregrep_exit(usage(2));
1834      }
1835    
1836    return options;
1837    }
1838    
1839    
1840    
1841    
1842    /*************************************************
1843    *          Construct printed ordinal             *
1844    *************************************************/
1845    
1846    /* This turns a number into "1st", "3rd", etc. */
1847    
1848    static char *
1849    ordin(int n)
1850    {
1851    static char buffer[8];
1852    char *p = buffer;
1853    sprintf(p, "%d", n);
1854    while (*p != 0) p++;
1855    switch (n%10)
1856      {
1857      case 1: strcpy(p, "st"); break;
1858      case 2: strcpy(p, "nd"); break;
1859      case 3: strcpy(p, "rd"); break;
1860      default: strcpy(p, "th"); break;
1861      }
1862    return buffer;
1863    }
1864    
1865    
1866    
1867    /*************************************************
1868    *          Compile a single pattern              *
1869    *************************************************/
1870    
1871    /* When the -F option has been used, this is called for each substring.
1872    Otherwise it's called for each supplied pattern.
1873    
1874    Arguments:
1875      pattern        the pattern string
1876      options        the PCRE options
1877      filename       the file name, or NULL for a command-line pattern
1878      count          0 if this is the only command line pattern, or
1879                     number of the command line pattern, or
1880                     linenumber for a pattern from a file
1881    
1882    Returns:         TRUE on success, FALSE after an error
1883    */
1884    
1885    static BOOL
1886    compile_single_pattern(char *pattern, int options, char *filename, int count)
1887    {
1888    char buffer[MBUFTHIRD + 16];
1889    const char *error;
1890    int errptr;
1891    
1892    if (pattern_count >= MAX_PATTERN_COUNT)
1893      {
1894      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1895        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1896      return FALSE;
1897      }
1898    
1899    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1900      suffix[process_options]);
1901    pattern_list[pattern_count] =
1902      pcre_compile(buffer, options, &error, &errptr, pcretables);
1903    if (pattern_list[pattern_count] != NULL)
1904      {
1905      pattern_count++;
1906      return TRUE;
1907      }
1908    
1909    /* Handle compile errors */
1910    
1911    errptr -= (int)strlen(prefix[process_options]);
1912    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1913    
1914    if (filename == NULL)
1915      {
1916      if (count == 0)
1917        fprintf(stderr, "pcregrep: Error in command-line regex "
1918          "at offset %d: %s\n", errptr, error);
1919      else
1920        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1921          "at offset %d: %s\n", ordin(count), errptr, error);
1922      }
1923    else
1924      {
1925      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1926        "at offset %d: %s\n", count, filename, errptr, error);
1927      }
1928    
1929    return FALSE;
1930    }
1931    
1932    
1933    
1934    /*************************************************
1935    *           Compile one supplied pattern         *
1936    *************************************************/
1937    
1938    /* When the -F option has been used, each string may be a list of strings,
1939    separated by line breaks. They will be matched literally.
1940    
1941    Arguments:
1942      pattern        the pattern string
1943      options        the PCRE options
1944      filename       the file name, or NULL for a command-line pattern
1945      count          0 if this is the only command line pattern, or
1946                     number of the command line pattern, or
1947                     linenumber for a pattern from a file
1948    
1949    Returns:         TRUE on success, FALSE after an error
1950    */
1951    
1952    static BOOL
1953    compile_pattern(char *pattern, int options, char *filename, int count)
1954    {
1955    if ((process_options & PO_FIXED_STRINGS) != 0)
1956      {
1957      char *eop = pattern + strlen(pattern);
1958      char buffer[MBUFTHIRD];
1959      for(;;)
1960        {
1961        int ellength;
1962        char *p = end_of_line(pattern, eop, &ellength);
1963        if (ellength == 0)
1964          return compile_single_pattern(pattern, options, filename, count);
1965        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1966        pattern = p;
1967        if (!compile_single_pattern(buffer, options, filename, count))
1968          return FALSE;
1969        }
1970      }
1971    else return compile_single_pattern(pattern, options, filename, count);
1972    }
1973    
1974    
1975    
1976    /*************************************************
1977    *                Main program                    *
1978    *************************************************/
1979    
1980    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1981    
1982    int
1983    main(int argc, char **argv)
1984    {
1985    int i, j;
1986    int rc = 1;
1987    int pcre_options = 0;
1988    int cmd_pattern_count = 0;
1989    int hint_count = 0;
1990    int errptr;
1991    BOOL only_one_at_top;
1992    char *patterns[MAX_PATTERN_COUNT];
1993    const char *locale_from = "--locale";
1994    const char *error;
1995    
1996    /* Set the default line ending value from the default in the PCRE library;
1997    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1998    Note that the return values from pcre_config(), though derived from the ASCII
1999    codes, are the same in EBCDIC environments, so we must use the actual values
2000    rather than escapes such as as '\r'. */
2001    
2002    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2003    switch(i)
2004      {
2005      default:               newline = (char *)"lf"; break;
2006      case 13:               newline = (char *)"cr"; break;
2007      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2008      case -1:               newline = (char *)"any"; break;
2009      case -2:               newline = (char *)"anycrlf"; break;
2010      }
2011    
2012    /* Process the options */
2013    
2014    for (i = 1; i < argc; i++)
2015      {
2016      option_item *op = NULL;
2017      char *option_data = (char *)"";    /* default to keep compiler happy */
2018      BOOL longop;
2019      BOOL longopwasequals = FALSE;
2020    
2021      if (argv[i][0] != '-') break;
2022    
2023      /* If we hit an argument that is just "-", it may be a reference to STDIN,
2024      but only if we have previously had -e or -f to define the patterns. */
2025    
2026      if (argv[i][1] == 0)
2027        {
2028        if (pattern_filename != NULL || pattern_count > 0) break;
2029          else pcregrep_exit(usage(2));
2030        }
2031    
2032      /* Handle a long name option, or -- to terminate the options */
2033    
2034      if (argv[i][1] == '-')
2035        {
2036        char *arg = argv[i] + 2;
2037        char *argequals = strchr(arg, '=');
2038    
2039        if (*arg == 0)    /* -- terminates options */
2040          {
2041          i++;
2042          break;                /* out of the options-handling loop */
2043          }
2044    
2045        longop = TRUE;
2046    
2047        /* Some long options have data that follows after =, for example file=name.
2048        Some options have variations in the long name spelling: specifically, we
2049        allow "regexp" because GNU grep allows it, though I personally go along
2050        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2051        These options are entered in the table as "regex(p)". Options can be in
2052        both these categories. */
2053    
2054        for (op = optionlist; op->one_char != 0; op++)
2055          {
2056          char *opbra = strchr(op->long_name, '(');
2057          char *equals = strchr(op->long_name, '=');
2058    
2059          /* Handle options with only one spelling of the name */
2060    
2061          if (opbra == NULL)     /* Does not contain '(' */
2062            {
2063            if (equals == NULL)  /* Not thing=data case */
2064              {
2065              if (strcmp(arg, op->long_name) == 0) break;
2066              }
2067            else                 /* Special case xxx=data */
2068              {
2069              int oplen = (int)(equals - op->long_name);
2070              int arglen = (argequals == NULL)?
2071                (int)strlen(arg) : (int)(argequals - arg);
2072              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2073                {
2074                option_data = arg + arglen;
2075                if (*option_data == '=')
2076                  {
2077                  option_data++;
2078                  longopwasequals = TRUE;
2079                  }
2080                break;
2081                }
2082              }
2083            }
2084    
2085          /* Handle options with an alternate spelling of the name */
2086    
2087          else
2088            {
2089            char buff1[24];
2090            char buff2[24];
2091    
2092            int baselen = (int)(opbra - op->long_name);
2093            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2094            int arglen = (argequals == NULL || equals == NULL)?
2095              (int)strlen(arg) : (int)(argequals - arg);
2096    
2097            sprintf(buff1, "%.*s", baselen, op->long_name);
2098            sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2099    
2100            if (strncmp(arg, buff1, arglen) == 0 ||
2101               strncmp(arg, buff2, arglen) == 0)
2102              {
2103              if (equals != NULL && argequals != NULL)
2104                {
2105                option_data = argequals;
2106                if (*option_data == '=')
2107                  {
2108                  option_data++;
2109                  longopwasequals = TRUE;
2110                  }
2111                }
2112              break;
2113              }
2114            }
2115          }
2116    
2117        if (op->one_char == 0)
2118          {
2119          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2120          pcregrep_exit(usage(2));
2121          }
2122        }
2123    
2124      /* Jeffrey Friedl's debugging harness uses these additional options which
2125      are not in the right form for putting in the option table because they use
2126      only one hyphen, yet are more than one character long. By putting them
2127      separately here, they will not get displayed as part of the help() output,
2128      but I don't think Jeffrey will care about that. */
2129    
2130    #ifdef JFRIEDL_DEBUG
2131      else if (strcmp(argv[i], "-pre") == 0) {
2132              jfriedl_prefix = argv[++i];
2133              continue;
2134      } else if (strcmp(argv[i], "-post") == 0) {
2135              jfriedl_postfix = argv[++i];
2136              continue;
2137      } else if (strcmp(argv[i], "-XT") == 0) {
2138              sscanf(argv[++i], "%d", &jfriedl_XT);
2139              continue;
2140      } else if (strcmp(argv[i], "-XR") == 0) {
2141              sscanf(argv[++i], "%d", &jfriedl_XR);
2142              continue;
2143      }
2144    #endif
2145    
2146    
2147      /* One-char options; many that have no data may be in a single argument; we
2148      continue till we hit the last one or one that needs data. */
2149    
2150      else
2151        {
2152        char *s = argv[i] + 1;
2153        longop = FALSE;
2154        while (*s != 0)
2155          {
2156          for (op = optionlist; op->one_char != 0; op++)
2157            { if (*s == op->one_char) break; }
2158          if (op->one_char == 0)
2159            {
2160            fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2161              *s, argv[i]);
2162            pcregrep_exit(usage(2));
2163            }
2164          if (op->type != OP_NODATA || s[1] == 0)
2165            {
2166            option_data = s+1;
2167            break;
2168            }
2169          pcre_options = handle_option(*s++, pcre_options);
2170          }
2171        }
2172    
2173      /* At this point we should have op pointing to a matched option. If the type
2174      is NO_DATA, it means that there is no data, and the option might set
2175      something in the PCRE options. */
2176    
2177      if (op->type == OP_NODATA)
2178        {
2179        pcre_options = handle_option(op->one_char, pcre_options);
2180        continue;
2181        }
2182    
2183      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2184      either has a value or defaults to something. It cannot have data in a
2185      separate item. At the moment, the only such options are "colo(u)r" and
2186      Jeffrey Friedl's special -S debugging option. */
2187    
2188      if (*option_data == 0 &&
2189          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2190        {
2191        switch (op->one_char)
2192          {
2193          case N_COLOUR:
2194          colour_option = (char *)"auto";
2195          break;
2196    #ifdef JFRIEDL_DEBUG
2197          case 'S':
2198          S_arg = 0;
2199          break;
2200    #endif
2201          }
2202        continue;
2203        }
2204    
2205      /* Otherwise, find the data string for the option. */
2206    
2207      if (*option_data == 0)
2208        {
2209        if (i >= argc - 1 || longopwasequals)
2210          {
2211          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2212          pcregrep_exit(usage(2));
2213          }
2214        option_data = argv[++i];
2215        }
2216    
2217      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2218      multiple times to create a list of patterns. */
2219    
2220      if (op->type == OP_PATLIST)
2221        {
2222        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2223          {
2224          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2225            MAX_PATTERN_COUNT);
2226          return 2;
2227          }
2228        patterns[cmd_pattern_count++] = option_data;
2229        }
2230    
2231      /* Otherwise, deal with single string or numeric data values. */
2232    
2233      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2234        {
2235        *((char **)op->dataptr) = option_data;
2236        }
2237    
2238      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2239      only for unpicking arguments, so just keep it simple. */
2240    
2241      else
2242        {
2243        unsigned long int n = 0;
2244        char *endptr = option_data;
2245        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2246        while (isdigit((unsigned char)(*endptr)))
2247          n = n * 10 + (int)(*endptr++ - '0');
2248        if (*endptr != 0)
2249          {
2250          if (longop)
2251            {
2252            char *equals = strchr(op->long_name, '=');
2253            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2254              (int)(equals - op->long_name);
2255            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2256              option_data, nlen, op->long_name);
2257            }
2258          else
2259            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2260              option_data, op->one_char);
2261          pcregrep_exit(usage(2));
2262          }
2263        *((int *)op->dataptr) = n;
2264        }
2265      }
2266    
2267    /* Options have been decoded. If -C was used, its value is used as a default
2268    for -A and -B. */
2269    
2270    if (both_context > 0)
2271      {
2272      if (after_context == 0) after_context = both_context;
2273      if (before_context == 0) before_context = both_context;
2274      }
2275    
2276    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2277    However, the latter two set the only_matching flag. */
2278    
2279    if ((only_matching && (file_offsets || line_offsets)) ||
2280        (file_offsets && line_offsets))
2281      {
2282      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2283        "and/or --line-offsets\n");
2284      pcregrep_exit(usage(2));
2285      }
2286    
2287    if (file_offsets || line_offsets) only_matching = TRUE;
2288    
2289    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2290    LC_ALL environment variable is set, and if so, use it. */
2291    
2292    if (locale == NULL)
2293      {
2294      locale = getenv("LC_ALL");
2295      locale_from = "LCC_ALL";
2296      }
2297    
2298    if (locale == NULL)
2299      {
2300      locale = getenv("LC_CTYPE");
2301      locale_from = "LC_CTYPE";
2302      }
2303    
2304    /* If a locale has been provided, set it, and generate the tables the PCRE
2305    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2306    
2307    if (locale != NULL)
2308      {
2309      if (setlocale(LC_CTYPE, locale) == NULL)
2310        {
2311        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2312          locale, locale_from);
2313        return 2;
2314        }
2315      pcretables = pcre_maketables();
2316      }
2317    
2318    /* Sort out colouring */
2319    
2320    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2321      {
2322      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2323      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2324      else
2325        {
2326        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2327          colour_option);
2328        return 2;
2329        }
2330      if (do_colour)
2331        {
2332        char *cs = getenv("PCREGREP_COLOUR");
2333        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2334        if (cs != NULL) colour_string = cs;
2335        }
2336      }
2337    
2338    /* Interpret the newline type; the default settings are Unix-like. */
2339    
2340    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2341      {
2342      pcre_options |= PCRE_NEWLINE_CR;
2343      endlinetype = EL_CR;
2344      }
2345    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2346      {
2347      pcre_options |= PCRE_NEWLINE_LF;
2348      endlinetype = EL_LF;
2349      }
2350    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2351      {
2352      pcre_options |= PCRE_NEWLINE_CRLF;
2353      endlinetype = EL_CRLF;
2354      }
2355    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2356      {
2357      pcre_options |= PCRE_NEWLINE_ANY;
2358      endlinetype = EL_ANY;
2359      }
2360    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2361      {
2362      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2363      endlinetype = EL_ANYCRLF;
2364      }
2365    else
2366      {
2367      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2368      return 2;
2369      }
2370    
2371    /* Interpret the text values for -d and -D */
2372    
2373    if (dee_option != NULL)
2374      {
2375      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2376      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2377      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2378      else
2379        {
2380        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2381        return 2;
2382        }
2383      }
2384    
2385    if (DEE_option != NULL)
2386      {
2387      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2388      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2389      else
2390        {
2391        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2392        return 2;
2393        }
2394      }
2395    
2396    /* Check the values for Jeffrey Friedl's debugging options. */
2397    
2398    #ifdef JFRIEDL_DEBUG
2399    if (S_arg > 9)
2400      {
2401      fprintf(stderr, "pcregrep: bad value for -S option\n");
2402      return 2;
2403      }
2404    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2405      {
2406      if (jfriedl_XT == 0) jfriedl_XT = 1;
2407      if (jfriedl_XR == 0) jfriedl_XR = 1;
2408      }
2409    #endif
2410    
2411    /* Get memory to store the pattern and hints lists. */
2412    
2413    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2414    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2415    
2416    if (pattern_list == NULL || hints_list == NULL)
2417      {
2418      fprintf(stderr, "pcregrep: malloc failed\n");
2419      goto EXIT2;
2420      }
2421    
2422    /* If no patterns were provided by -e, and there is no file provided by -f,
2423    the first argument is the one and only pattern, and it must exist. */
2424    
2425    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2426      {
2427      if (i >= argc) return usage(2);
2428      patterns[cmd_pattern_count++] = argv[i++];
2429      }
2430    
2431    /* Compile the patterns that were provided on the command line, either by
2432    multiple uses of -e or as a single unkeyed pattern. */
2433    
2434    for (j = 0; j < cmd_pattern_count; j++)
2435      {
2436      if (!compile_pattern(patterns[j], pcre_options, NULL,
2437           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2438        goto EXIT2;
2439      }
2440    
2441    /* Compile the regular expressions that are provided in a file. */
2442    
2443    if (pattern_filename != NULL)
2444      {
2445      int linenumber = 0;
2446      FILE *f;
2447      char *filename;
2448      char buffer[MBUFTHIRD];
2449    
2450      if (strcmp(pattern_filename, "-") == 0)
2451        {
2452        f = stdin;
2453        filename = stdin_name;
2454        }
2455      else
2456        {
2457        f = fopen(pattern_filename, "r");
2458        if (f == NULL)
2459          {
2460          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2461            strerror(errno));
2462          goto EXIT2;
2463          }
2464        filename = pattern_filename;
2465        }
2466    
2467      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2468        {
2469        char *s = buffer + (int)strlen(buffer);
2470        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2471        *s = 0;
2472        linenumber++;
2473        if (buffer[0] == 0) continue;   /* Skip blank lines */
2474        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2475          goto EXIT2;
2476        }
2477    
2478      if (f != stdin) fclose(f);
2479      }
2480    
2481    /* Study the regular expressions, as we will be running them many times */
2482    
2483    for (j = 0; j < pattern_count; j++)
2484      {
2485      hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2486      if (error != NULL)
2487        {
2488        char s[16];
2489        if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2490        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2491        goto EXIT2;
2492        }
2493      hint_count++;
2494      }
2495    
2496    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2497    pcre_extra block for each pattern. */
2498    
2499    if (match_limit > 0 || match_limit_recursion > 0)
2500      {
2501      for (j = 0; j < pattern_count; j++)
2502        {
2503        if (hints_list[j] == NULL)
2504          {
2505          hints_list[j] = malloc(sizeof(pcre_extra));
2506          if (hints_list[j] == NULL)
2507            {
2508            fprintf(stderr, "pcregrep: malloc failed\n");
2509            pcregrep_exit(2);
2510            }
2511          }
2512        if (match_limit > 0)
2513          {
2514          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2515          hints_list[j]->match_limit = match_limit;
2516          }
2517        if (match_limit_recursion > 0)
2518          {
2519          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2520          hints_list[j]->match_limit_recursion = match_limit_recursion;
2521          }
2522        }
2523      }
2524    
2525    /* If there are include or exclude patterns, compile them. */
2526    
2527    if (exclude_pattern != NULL)
2528      {
2529      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2530        pcretables);
2531      if (exclude_compiled == NULL)
2532        {
2533        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2534          errptr, error);
2535        goto EXIT2;
2536        }
2537      }
2538    
2539    if (include_pattern != NULL)
2540      {
2541      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2542        pcretables);
2543      if (include_compiled == NULL)
2544        {
2545        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2546          errptr, error);
2547        goto EXIT2;
2548        }
2549      }
2550    
2551    if (exclude_dir_pattern != NULL)
2552      {
2553      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2554        pcretables);
2555      if (exclude_dir_compiled == NULL)
2556        {
2557        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2558          errptr, error);
2559        goto EXIT2;
2560        }
2561      }
2562    
2563    if (include_dir_pattern != NULL)
2564      {
2565      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2566        pcretables);
2567      if (include_dir_compiled == NULL)
2568        {
2569        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2570          errptr, error);
2571        goto EXIT2;
2572        }
2573      }
2574    
2575    /* If there are no further arguments, do the business on stdin and exit. */
2576    
2577    if (i >= argc)
2578      {
2579      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2580      goto EXIT;
2581      }
2582    
2583    /* Otherwise, work through the remaining arguments as files or directories.
2584    Pass in the fact that there is only one argument at top level - this suppresses
2585    the file name if the argument is not a directory and filenames are not
2586    otherwise forced. */
2587    
2588    only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2589    
2590    for (; i < argc; i++)
2591      {
2592      int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2593        only_one_at_top);
2594      if (frc > 1) rc = frc;
2595        else if (frc == 0 && rc == 1) rc = 0;
2596      }
2597    
2598    EXIT:
2599    if (pattern_list != NULL)
2600      {
2601      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2602      free(pattern_list);
2603      }
2604    if (hints_list != NULL)
2605      {
2606      for (i = 0; i < hint_count; i++)
2607        {
2608        if (hints_list[i] != NULL) free(hints_list[i]);
2609        }
2610      free(hints_list);
2611      }
2612    pcregrep_exit(rc);
2613    
2614    EXIT2:
2615    rc = 2;
2616    goto EXIT;
2617    }
2618    
2619    /* End of pcregrep */

Legend:
Removed from v.49  
changed lines
  Added in v.564

  ViewVC Help
Powered by ViewVC 1.1.5