/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 280 by ph10, Wed Dec 5 20:56:03 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2007 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44    #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58  #include "pcre.h"  #include "pcre.h"
59    
60  #define FALSE 0  #define FALSE 0
# Line 17  its pattern matching. */ Line 62  its pattern matching. */
62    
63  typedef int BOOL;  typedef int BOOL;
64    
65    #define MAX_PATTERN_COUNT 100
66    
67    #if BUFSIZ > 8192
68    #define MBUFTHIRD BUFSIZ
69    #else
70    #define MBUFTHIRD 8192
71    #endif
72    
73    /* Values for the "filenames" variable, which specifies options for file name
74    output. The order is important; it is assumed that a file name is wanted for
75    all values greater than FN_DEFAULT. */
76    
77    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79    /* Actions for the -d and -D options */
80    
81    enum { dee_READ, dee_SKIP, dee_RECURSE };
82    enum { DEE_READ, DEE_SKIP };
83    
84    /* Actions for special processing options (flag bits) */
85    
86    #define PO_WORD_MATCH     0x0001
87    #define PO_LINE_MATCH     0x0002
88    #define PO_FIXED_STRINGS  0x0004
89    
90    /* Line ending types */
91    
92    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
96  /*************************************************  /*************************************************
97  *               Global variables                 *  *               Global variables                 *
98  *************************************************/  *************************************************/
99    
100  static pcre *pattern;  /* Jeffrey Friedl has some debugging requirements that are not part of the
101  static pcre_extra *hints;  regular code. */
102    
103    #ifdef JFRIEDL_DEBUG
104    static int S_arg = -1;
105    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107    static const char *jfriedl_prefix = "";
108    static const char *jfriedl_postfix = "";
109    #endif
110    
111    static int  endlinetype;
112    
113    static char *colour_string = (char *)"1;31";
114    static char *colour_option = NULL;
115    static char *dee_option = NULL;
116    static char *DEE_option = NULL;
117    static char *newline = NULL;
118    static char *pattern_filename = NULL;
119    static char *stdin_name = (char *)"(standard input)";
120    static char *locale = NULL;
121    
122    static const unsigned char *pcretables = NULL;
123    
124    static int  pattern_count = 0;
125    static pcre **pattern_list = NULL;
126    static pcre_extra **hints_list = NULL;
127    
128    static char *include_pattern = NULL;
129    static char *exclude_pattern = NULL;
130    
131    static pcre *include_compiled = NULL;
132    static pcre *exclude_compiled = NULL;
133    
134    static int after_context = 0;
135    static int before_context = 0;
136    static int both_context = 0;
137    static int dee_action = dee_READ;
138    static int DEE_action = DEE_READ;
139    static int error_count = 0;
140    static int filenames = FN_DEFAULT;
141    static int process_options = 0;
142    
143  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
144  static BOOL filenames_only = FALSE;  static BOOL do_colour = FALSE;
145    static BOOL file_offsets = FALSE;
146    static BOOL hyphenpending = FALSE;
147  static BOOL invert = FALSE;  static BOOL invert = FALSE;
148    static BOOL line_offsets = FALSE;
149    static BOOL multiline = FALSE;
150  static BOOL number = FALSE;  static BOOL number = FALSE;
151    static BOOL only_matching = FALSE;
152    static BOOL quiet = FALSE;
153  static BOOL silent = FALSE;  static BOOL silent = FALSE;
154  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
155    
156    /* Structure for options and list of them */
157    
158    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
159           OP_PATLIST };
160    
161    typedef struct option_item {
162      int type;
163      int one_char;
164      void *dataptr;
165      const char *long_name;
166      const char *help_text;
167    } option_item;
168    
169    /* Options without a single-letter equivalent get a negative value. This can be
170    used to identify them. */
171    
172    #define N_COLOUR    (-1)
173    #define N_EXCLUDE   (-2)
174    #define N_HELP      (-3)
175    #define N_INCLUDE   (-4)
176    #define N_LABEL     (-5)
177    #define N_LOCALE    (-6)
178    #define N_NULL      (-7)
179    #define N_LOFFSETS  (-8)
180    #define N_FOFFSETS  (-9)
181    
182    static option_item optionlist[] = {
183      { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
184      { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
185      { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
186      { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
187      { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
188      { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
189      { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
190      { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
191      { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
192      { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
193      { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
194      { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
195      { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
196      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
197      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
198      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
199      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
200      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
201      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
202      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
203      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
204      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
205      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
206      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
207      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
208      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
209      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
210      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
211      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
212      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
213    #ifdef JFRIEDL_DEBUG
214      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
215    #endif
216      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
217      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
218      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
219      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
220      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
221      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
222      { OP_NODATA,    0,        NULL,               NULL,            NULL }
223    };
224    
225    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
226    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
227    that the combination of -w and -x has the same effect as -x on its own, so we
228    can treat them as the same. */
229    
230    static const char *prefix[] = {
231      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
232    
233    static const char *suffix[] = {
234      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
235    
236    /* UTF-8 tables - used only when the newline setting is "any". */
237    
238    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
239    
240    const char utf8_table4[] = {
241      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
242      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
243      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
244      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
245    
246    
247    
248    /*************************************************
249    *            OS-specific functions               *
250    *************************************************/
251    
252    /* These functions are defined so that they can be made system specific,
253    although at present the only ones are for Unix, Win32, and for "no support". */
254    
255    
256    /************* Directory scanning in Unix ***********/
257    
258    #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
259    #include <sys/types.h>
260    #include <sys/stat.h>
261    #include <dirent.h>
262    
263    typedef DIR directory_type;
264    
265    static int
266    isdirectory(char *filename)
267    {
268    struct stat statbuf;
269    if (stat(filename, &statbuf) < 0)
270      return 0;        /* In the expectation that opening as a file will fail */
271    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
272    }
273    
274    static directory_type *
275    opendirectory(char *filename)
276    {
277    return opendir(filename);
278    }
279    
280    static char *
281    readdirectory(directory_type *dir)
282    {
283    for (;;)
284      {
285      struct dirent *dent = readdir(dir);
286      if (dent == NULL) return NULL;
287      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
288        return dent->d_name;
289      }
290    /* Control never reaches here */
291    }
292    
293    static void
294    closedirectory(directory_type *dir)
295    {
296    closedir(dir);
297    }
298    
299    
300    /************* Test for regular file in Unix **********/
301    
302    static int
303    isregfile(char *filename)
304    {
305    struct stat statbuf;
306    if (stat(filename, &statbuf) < 0)
307      return 1;        /* In the expectation that opening as a file will fail */
308    return (statbuf.st_mode & S_IFMT) == S_IFREG;
309    }
310    
311    
312    /************* Test stdout for being a terminal in Unix **********/
313    
314    static BOOL
315    is_stdout_tty(void)
316    {
317    return isatty(fileno(stdout));
318    }
319    
320    
321    /************* Directory scanning in Win32 ***********/
322    
323    /* I (Philip Hazel) have no means of testing this code. It was contributed by
324    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
325    when it did not exist. */
326    
327    
328    #elif HAVE_WINDOWS_H
329    
330    #ifndef STRICT
331    # define STRICT
332    #endif
333    #ifndef WIN32_LEAN_AND_MEAN
334    # define WIN32_LEAN_AND_MEAN
335    #endif
336    #ifndef INVALID_FILE_ATTRIBUTES
337    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
338    #endif
339    
340    #include <windows.h>
341    
342    typedef struct directory_type
343    {
344    HANDLE handle;
345    BOOL first;
346    WIN32_FIND_DATA data;
347    } directory_type;
348    
349    int
350    isdirectory(char *filename)
351    {
352    DWORD attr = GetFileAttributes(filename);
353    if (attr == INVALID_FILE_ATTRIBUTES)
354      return 0;
355    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
356    }
357    
358    directory_type *
359    opendirectory(char *filename)
360    {
361    size_t len;
362    char *pattern;
363    directory_type *dir;
364    DWORD err;
365    len = strlen(filename);
366    pattern = (char *) malloc(len + 3);
367    dir = (directory_type *) malloc(sizeof(*dir));
368    if ((pattern == NULL) || (dir == NULL))
369      {
370      fprintf(stderr, "pcregrep: malloc failed\n");
371      exit(2);
372      }
373    memcpy(pattern, filename, len);
374    memcpy(&(pattern[len]), "\\*", 3);
375    dir->handle = FindFirstFile(pattern, &(dir->data));
376    if (dir->handle != INVALID_HANDLE_VALUE)
377      {
378      free(pattern);
379      dir->first = TRUE;
380      return dir;
381      }
382    err = GetLastError();
383    free(pattern);
384    free(dir);
385    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
386    return NULL;
387    }
388    
389    char *
390    readdirectory(directory_type *dir)
391    {
392    for (;;)
393      {
394      if (!dir->first)
395        {
396        if (!FindNextFile(dir->handle, &(dir->data)))
397          return NULL;
398        }
399      else
400        {
401        dir->first = FALSE;
402        }
403      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
404        return dir->data.cFileName;
405      }
406    #ifndef _MSC_VER
407    return NULL;   /* Keep compiler happy; never executed */
408    #endif
409    }
410    
411    void
412    closedirectory(directory_type *dir)
413    {
414    FindClose(dir->handle);
415    free(dir);
416    }
417    
418    
419    /************* Test for regular file in Win32 **********/
420    
421    /* I don't know how to do this, or if it can be done; assume all paths are
422    regular if they are not directories. */
423    
424    int isregfile(char *filename)
425    {
426    return !isdirectory(filename)
427    }
428    
429  #if ! HAVE_STRERROR  
430    /************* Test stdout for being a terminal in Win32 **********/
431    
432    /* I don't know how to do this; assume never */
433    
434    static BOOL
435    is_stdout_tty(void)
436    {
437    FALSE;
438    }
439    
440    
441    /************* Directory scanning when we can't do it ***********/
442    
443    /* The type is void, and apart from isdirectory(), the functions do nothing. */
444    
445    #else
446    
447    typedef void directory_type;
448    
449    int isdirectory(char *filename) { return 0; }
450    directory_type * opendirectory(char *filename) { return (directory_type*)0;}
451    char *readdirectory(directory_type *dir) { return (char*)0;}
452    void closedirectory(directory_type *dir) {}
453    
454    
455    /************* Test for regular when we can't do it **********/
456    
457    /* Assume all files are regular. */
458    
459    int isregfile(char *filename) { return 1; }
460    
461    
462    /************* Test stdout for being a terminal when we can't do it **********/
463    
464    static BOOL
465    is_stdout_tty(void)
466    {
467    return FALSE;
468    }
469    
470    
471    #endif
472    
473    
474    
475    #ifndef HAVE_STRERROR
476  /*************************************************  /*************************************************
477  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
478  *************************************************/  *************************************************/
# Line 58  return sys_errlist[n]; Line 495  return sys_errlist[n];
495    
496    
497  /*************************************************  /*************************************************
498  *              Grep an individual file           *  *             Find end of line                   *
499  *************************************************/  *************************************************/
500    
501  static int  /* The length of the endline sequence that is found is set via lenptr. This may
502  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
 {  
 int rc = 1;  
 int linenumber = 0;  
 int count = 0;  
 int offsets[99];  
 char buffer[BUFSIZ];  
503    
504  while (fgets(buffer, sizeof(buffer), in) != NULL)  Arguments:
505    {    p         current position in line
506    BOOL match;    endptr    end of available data
507    int length = (int)strlen(buffer);    lenptr    where to put the length of the eol sequence
   if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;  
   linenumber++;  
508    
509    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;  Returns:    pointer to the last byte of the line
510    if (match && whole_lines && offsets[1] != length) match = FALSE;  */
511    
512    if (match != invert)  static char *
513    end_of_line(char *p, char *endptr, int *lenptr)
514    {
515    switch(endlinetype)
516      {
517      default:      /* Just in case */
518      case EL_LF:
519      while (p < endptr && *p != '\n') p++;
520      if (p < endptr)
521      {      {
522      if (count_only) count++;      *lenptr = 1;
523        return p + 1;
524        }
525      *lenptr = 0;
526      return endptr;
527    
528      case EL_CR:
529      while (p < endptr && *p != '\r') p++;
530      if (p < endptr)
531        {
532        *lenptr = 1;
533        return p + 1;
534        }
535      *lenptr = 0;
536      return endptr;
537    
538      else if (filenames_only)    case EL_CRLF:
539      for (;;)
540        {
541        while (p < endptr && *p != '\r') p++;
542        if (++p >= endptr)
543        {        {
544        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        *lenptr = 0;
545        return 0;        return endptr;
546          }
547        if (*p == '\n')
548          {
549          *lenptr = 2;
550          return p + 1;
551        }        }
552        }
553      break;
554    
555      else if (silent) return 0;    case EL_ANYCRLF:
556      while (p < endptr)
557        {
558        int extra = 0;
559        register int c = *((unsigned char *)p);
560    
561      else      if (utf8 && c >= 0xc0)
562        {        {
563        if (name != NULL) fprintf(stdout, "%s:", name);        int gcii, gcss;
564        if (number) fprintf(stdout, "%d:", linenumber);        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
565        fprintf(stdout, "%s\n", buffer);        gcss = 6*extra;
566          c = (c & utf8_table3[extra]) << gcss;
567          for (gcii = 1; gcii <= extra; gcii++)
568            {
569            gcss -= 6;
570            c |= (p[gcii] & 0x3f) << gcss;
571            }
572        }        }
573    
574      rc = 0;      p += 1 + extra;
     }  
   }  
575    
576  if (count_only)      switch (c)
577    {        {
578    if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
579    fprintf(stdout, "%d\n", count);        *lenptr = 1;
580    }        return p;
581    
582          case 0x0d:    /* CR */
583          if (p < endptr && *p == 0x0a)
584            {
585            *lenptr = 2;
586            p++;
587            }
588          else *lenptr = 1;
589          return p;
590    
591  return rc;        default:
592  }        break;
593          }
594        }   /* End of loop for ANYCRLF case */
595    
596      *lenptr = 0;  /* Must have hit the end */
597      return endptr;
598    
599      case EL_ANY:
600      while (p < endptr)
601        {
602        int extra = 0;
603        register int c = *((unsigned char *)p);
604    
605        if (utf8 && c >= 0xc0)
606          {
607          int gcii, gcss;
608          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
609          gcss = 6*extra;
610          c = (c & utf8_table3[extra]) << gcss;
611          for (gcii = 1; gcii <= extra; gcii++)
612            {
613            gcss -= 6;
614            c |= (p[gcii] & 0x3f) << gcss;
615            }
616          }
617    
618  /*************************************************      p += 1 + extra;
 *                Usage function                  *  
 *************************************************/  
619    
620  static int      switch (c)
621  usage(int rc)        {
622  {        case 0x0a:    /* LF */
623  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");        case 0x0b:    /* VT */
624  return rc;        case 0x0c:    /* FF */
625  }        *lenptr = 1;
626          return p;
627    
628          case 0x0d:    /* CR */
629          if (p < endptr && *p == 0x0a)
630            {
631            *lenptr = 2;
632            p++;
633            }
634          else *lenptr = 1;
635          return p;
636    
637          case 0x85:    /* NEL */
638          *lenptr = utf8? 2 : 1;
639          return p;
640    
641          case 0x2028:  /* LS */
642          case 0x2029:  /* PS */
643          *lenptr = 3;
644          return p;
645    
646          default:
647          break;
648          }
649        }   /* End of loop for ANY case */
650    
651      *lenptr = 0;  /* Must have hit the end */
652      return endptr;
653      }     /* End of overall switch */
654    }
655    
656    
657    
658  /*************************************************  /*************************************************
659  *                Main program                    *  *         Find start of previous line            *
660  *************************************************/  *************************************************/
661    
662  int  /* This is called when looking back for before lines to print.
 main(int argc, char **argv)  
 {  
 int i;  
 int rc = 1;  
 int options = 0;  
 int errptr;  
 const char *error;  
 BOOL filenames = TRUE;  
663    
664  /* Process the options */  Arguments:
665      p         start of the subsequent line
666      startptr  start of available data
667    
668  for (i = 1; i < argc; i++)  Returns:    pointer to the start of the previous line
669    */
670    
671    static char *
672    previous_line(char *p, char *startptr)
673    {
674    switch(endlinetype)
675    {    {
676    char *s;    default:      /* Just in case */
677    if (argv[i][0] != '-') break;    case EL_LF:
678    s = argv[i] + 1;    p--;
679    while (*s != 0)    while (p > startptr && p[-1] != '\n') p--;
680      return p;
681    
682      case EL_CR:
683      p--;
684      while (p > startptr && p[-1] != '\n') p--;
685      return p;
686    
687      case EL_CRLF:
688      for (;;)
689        {
690        p -= 2;
691        while (p > startptr && p[-1] != '\n') p--;
692        if (p <= startptr + 1 || p[-2] == '\r') return p;
693        }
694      return p;   /* But control should never get here */
695    
696      case EL_ANY:
697      case EL_ANYCRLF:
698      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
699      if (utf8) while ((*p & 0xc0) == 0x80) p--;
700    
701      while (p > startptr)
702      {      {
703      switch (*s++)      register int c;
704        char *pp = p - 1;
705    
706        if (utf8)
707          {
708          int extra = 0;
709          while ((*pp & 0xc0) == 0x80) pp--;
710          c = *((unsigned char *)pp);
711          if (c >= 0xc0)
712            {
713            int gcii, gcss;
714            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
715            gcss = 6*extra;
716            c = (c & utf8_table3[extra]) << gcss;
717            for (gcii = 1; gcii <= extra; gcii++)
718              {
719              gcss -= 6;
720              c |= (pp[gcii] & 0x3f) << gcss;
721              }
722            }
723          }
724        else c = *((unsigned char *)pp);
725    
726        if (endlinetype == EL_ANYCRLF) switch (c)
727        {        {
728        case 'c': count_only = TRUE; break;        case 0x0a:    /* LF */
729        case 'h': filenames = FALSE; break;        case 0x0d:    /* CR */
730        case 'i': options |= PCRE_CASELESS; break;        return p;
       case 'l': filenames_only = TRUE;  
       case 'n': number = TRUE; break;  
       case 's': silent = TRUE; break;  
       case 'v': invert = TRUE; break;  
       case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
731    
732        case 'V':        default:
       fprintf(stderr, "PCRE version %s\n", pcre_version());  
733        break;        break;
734          }
735    
736        else switch (c)
737          {
738          case 0x0a:    /* LF */
739          case 0x0b:    /* VT */
740          case 0x0c:    /* FF */
741          case 0x0d:    /* CR */
742          case 0x85:    /* NEL */
743          case 0x2028:  /* LS */
744          case 0x2029:  /* PS */
745          return p;
746    
747        default:        default:
748        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);        break;
       return usage(2);  
749        }        }
750    
751        p = pp;  /* Back one character */
752        }        /* End of loop for ANY case */
753    
754      return startptr;  /* Hit start of data */
755      }     /* End of overall switch */
756    }
757    
758    
759    
760    
761    
762    /*************************************************
763    *       Print the previous "after" lines         *
764    *************************************************/
765    
766    /* This is called if we are about to lose said lines because of buffer filling,
767    and at the end of the file. The data in the line is written using fwrite() so
768    that a binary zero does not terminate it.
769    
770    Arguments:
771      lastmatchnumber   the number of the last matching line, plus one
772      lastmatchrestart  where we restarted after the last match
773      endptr            end of available data
774      printname         filename for printing
775    
776    Returns:            nothing
777    */
778    
779    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
780      char *endptr, char *printname)
781    {
782    if (after_context > 0 && lastmatchnumber > 0)
783      {
784      int count = 0;
785      while (lastmatchrestart < endptr && count++ < after_context)
786        {
787        int ellength;
788        char *pp = lastmatchrestart;
789        if (printname != NULL) fprintf(stdout, "%s-", printname);
790        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
791        pp = end_of_line(pp, endptr, &ellength);
792        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
793        lastmatchrestart = pp;
794      }      }
795      hyphenpending = TRUE;
796    }    }
797    }
798    
 /* There must be at least a regexp argument */  
799    
 if (i >= argc) return usage(0);  
800    
801  /* Compile the regular expression. */  /*************************************************
802    *            Grep an individual file             *
803    *************************************************/
804    
805    /* This is called from grep_or_recurse() below. It uses a buffer that is three
806    times the value of MBUFTHIRD. The matching point is never allowed to stray into
807    the top third of the buffer, thus keeping more of the file available for
808    context printing or for multiline scanning. For large files, the pointer will
809    be in the middle third most of the time, so the bottom third is available for
810    "before" context printing.
811    
812    Arguments:
813      in           the fopened FILE stream
814      printname    the file name if it is to be printed for each match
815                   or NULL if the file name is not to be printed
816                   it cannot be NULL if filenames[_nomatch]_only is set
817    
818    Returns:       0 if there was at least one match
819                   1 otherwise (no matches)
820    */
821    
822    static int
823    pcregrep(FILE *in, char *printname)
824    {
825    int rc = 1;
826    int linenumber = 1;
827    int lastmatchnumber = 0;
828    int count = 0;
829    int filepos = 0;
830    int offsets[99];
831    char *lastmatchrestart = NULL;
832    char buffer[3*MBUFTHIRD];
833    char *ptr = buffer;
834    char *endptr;
835    size_t bufflength;
836    BOOL endhyphenpending = FALSE;
837    
838    /* Do the first read into the start of the buffer and set up the pointer to
839    end of what we have. */
840    
841    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
842    endptr = buffer + bufflength;
843    
844    /* Loop while the current pointer is not at the end of the file. For large
845    files, endptr will be at the end of the buffer when we are in the middle of the
846    file, but ptr will never get there, because as soon as it gets over 2/3 of the
847    way, the buffer is shifted left and re-filled. */
848    
849  pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  while (ptr < endptr)
 if (pattern == NULL)  
850    {    {
851    fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);    int i, endlinelength;
852    return 2;    int mrc = 0;
853    }    BOOL match = FALSE;
854      char *matchptr = ptr;
855      char *t = ptr;
856      size_t length, linelength;
857    
858      /* At this point, ptr is at the start of a line. We need to find the length
859      of the subject string to pass to pcre_exec(). In multiline mode, it is the
860      length remainder of the data in the buffer. Otherwise, it is the length of
861      the next line. After matching, we always advance by the length of the next
862      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
863      that any match is constrained to be in the first line. */
864    
865      t = end_of_line(t, endptr, &endlinelength);
866      linelength = t - ptr - endlinelength;
867      length = multiline? (size_t)(endptr - ptr) : linelength;
868    
869  /* Study the regular expression, as we will be running it may times */    /* Extra processing for Jeffrey Friedl's debugging. */
870    
871  hints = pcre_study(pattern, 0, &error);  #ifdef JFRIEDL_DEBUG
872  if (error != NULL)    if (jfriedl_XT || jfriedl_XR)
873    {    {
874    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);        #include <sys/time.h>
875    return 2;        #include <time.h>
876    }        struct timeval start_time, end_time;
877          struct timezone dummy;
878    
879  /* If there are no further arguments, do the business on stdin and exit */        if (jfriedl_XT)
880          {
881              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
882              const char *orig = ptr;
883              ptr = malloc(newlen + 1);
884              if (!ptr) {
885                      printf("out of memory");
886                      exit(2);
887              }
888              endptr = ptr;
889              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
890              for (i = 0; i < jfriedl_XT; i++) {
891                      strncpy(endptr, orig,  length);
892                      endptr += length;
893              }
894              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
895              length = newlen;
896          }
897    
898  if (i >= argc) return pcregrep(stdin, NULL);        if (gettimeofday(&start_time, &dummy) != 0)
899                  perror("bad gettimeofday");
900    
 /* Otherwise, work through the remaining arguments as files. If there is only  
 one, don't give its name on the output. */  
901    
902  if (i == argc - 1) filenames = FALSE;        for (i = 0; i < jfriedl_XR; i++)
903  if (filenames_only) filenames = TRUE;            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
904    
905  for (; i < argc; i++)        if (gettimeofday(&end_time, &dummy) != 0)
906    {                perror("bad gettimeofday");
907    FILE *in = fopen(argv[i], "r");  
908    if (in == NULL)        double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
909                          -
910                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
911    
912          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
913          return 0;
914      }
915    #endif
916    
917      /* We come back here after a match when the -o option (only_matching) is set,
918      in order to find any further matches in the same line. */
919    
920      ONLY_MATCHING_RESTART:
921    
922      /* Run through all the patterns until one matches. Note that we don't include
923      the final newline in the subject string. */
924    
925      for (i = 0; i < pattern_count; i++)
926      {      {
927      fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
928      rc = 2;        offsets, 99);
929        if (mrc >= 0) { match = TRUE; break; }
930        if (mrc != PCRE_ERROR_NOMATCH)
931          {
932          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
933          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
934          fprintf(stderr, "this line:\n");
935          fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
936          fprintf(stderr, "\n");
937          if (error_count == 0 &&
938              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
939            {
940            fprintf(stderr, "pcregrep: error %d means that a resource limit "
941              "was exceeded\n", mrc);
942            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
943            }
944          if (error_count++ > 20)
945            {
946            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
947            exit(2);
948            }
949          match = invert;    /* No more matching; don't show the line again */
950          break;
951          }
952      }      }
953    else  
954      /* If it's a match or a not-match (as required), do what's wanted. */
955    
956      if (match != invert)
957      {      {
958      int frc = pcregrep(in, filenames? argv[i] : NULL);      BOOL hyphenprinted = FALSE;
     if (frc == 0 && rc == 1) rc = 0;  
     fclose(in);  
     }  
   }  
959    
960  return rc;      /* We've failed if we want a file that doesn't have any matches. */
961    
962        if (filenames == FN_NOMATCH_ONLY) return 1;
963    
964        /* Just count if just counting is wanted. */
965    
966        if (count_only) count++;
967    
968        /* If all we want is a file name, there is no need to scan any more lines
969        in the file. */
970    
971        else if (filenames == FN_ONLY)
972          {
973          fprintf(stdout, "%s\n", printname);
974          return 0;
975          }
976    
977        /* Likewise, if all we want is a yes/no answer. */
978    
979        else if (quiet) return 0;
980    
981        /* The --only-matching option prints just the substring that matched, and
982        the --file-offsets and --line-offsets options output offsets for the
983        matching substring (they both force --only-matching). None of these options
984        prints any context. Afterwards, adjust the start and length, and then jump
985        back to look for further matches in the same line. If we are in invert
986        mode, however, nothing is printed - this could be still useful because the
987        return code is set. */
988    
989        else if (only_matching)
990          {
991          if (!invert)
992            {
993            if (printname != NULL) fprintf(stdout, "%s:", printname);
994            if (number) fprintf(stdout, "%d:", linenumber);
995            if (line_offsets)
996              fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
997                offsets[1] - offsets[0]);
998            else if (file_offsets)
999              fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1000                offsets[1] - offsets[0]);
1001            else
1002              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1003            fprintf(stdout, "\n");
1004            matchptr += offsets[1];
1005            length -= offsets[1];
1006            match = FALSE;
1007            goto ONLY_MATCHING_RESTART;
1008            }
1009          }
1010    
1011        /* This is the default case when none of the above options is set. We print
1012        the matching lines(s), possibly preceded and/or followed by other lines of
1013        context. */
1014    
1015        else
1016          {
1017          /* See if there is a requirement to print some "after" lines from a
1018          previous match. We never print any overlaps. */
1019    
1020          if (after_context > 0 && lastmatchnumber > 0)
1021            {
1022            int ellength;
1023            int linecount = 0;
1024            char *p = lastmatchrestart;
1025    
1026            while (p < ptr && linecount < after_context)
1027              {
1028              p = end_of_line(p, ptr, &ellength);
1029              linecount++;
1030              }
1031    
1032            /* It is important to advance lastmatchrestart during this printing so
1033            that it interacts correctly with any "before" printing below. Print
1034            each line's data using fwrite() in case there are binary zeroes. */
1035    
1036            while (lastmatchrestart < p)
1037              {
1038              char *pp = lastmatchrestart;
1039              if (printname != NULL) fprintf(stdout, "%s-", printname);
1040              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1041              pp = end_of_line(pp, endptr, &ellength);
1042              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1043              lastmatchrestart = pp;
1044              }
1045            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1046            }
1047    
1048          /* If there were non-contiguous lines printed above, insert hyphens. */
1049    
1050          if (hyphenpending)
1051            {
1052            fprintf(stdout, "--\n");
1053            hyphenpending = FALSE;
1054            hyphenprinted = TRUE;
1055            }
1056    
1057          /* See if there is a requirement to print some "before" lines for this
1058          match. Again, don't print overlaps. */
1059    
1060          if (before_context > 0)
1061            {
1062            int linecount = 0;
1063            char *p = ptr;
1064    
1065            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1066                   linecount < before_context)
1067              {
1068              linecount++;
1069              p = previous_line(p, buffer);
1070              }
1071    
1072            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1073              fprintf(stdout, "--\n");
1074    
1075            while (p < ptr)
1076              {
1077              int ellength;
1078              char *pp = p;
1079              if (printname != NULL) fprintf(stdout, "%s-", printname);
1080              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1081              pp = end_of_line(pp, endptr, &ellength);
1082              fwrite(p, 1, pp - p, stdout);
1083              p = pp;
1084              }
1085            }
1086    
1087          /* Now print the matching line(s); ensure we set hyphenpending at the end
1088          of the file if any context lines are being output. */
1089    
1090          if (after_context > 0 || before_context > 0)
1091            endhyphenpending = TRUE;
1092    
1093          if (printname != NULL) fprintf(stdout, "%s:", printname);
1094          if (number) fprintf(stdout, "%d:", linenumber);
1095    
1096          /* In multiline mode, we want to print to the end of the line in which
1097          the end of the matched string is found, so we adjust linelength and the
1098          line number appropriately, but only when there actually was a match
1099          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1100          the match will always be before the first newline sequence. */
1101    
1102          if (multiline)
1103            {
1104            int ellength;
1105            char *endmatch = ptr;
1106            if (!invert)
1107              {
1108              endmatch += offsets[1];
1109              t = ptr;
1110              while (t < endmatch)
1111                {
1112                t = end_of_line(t, endptr, &ellength);
1113                if (t <= endmatch) linenumber++; else break;
1114                }
1115              }
1116            endmatch = end_of_line(endmatch, endptr, &ellength);
1117            linelength = endmatch - ptr - ellength;
1118            }
1119    
1120          /*** NOTE: Use only fwrite() to output the data line, so that binary
1121          zeroes are treated as just another data character. */
1122    
1123          /* This extra option, for Jeffrey Friedl's debugging requirements,
1124          replaces the matched string, or a specific captured string if it exists,
1125          with X. When this happens, colouring is ignored. */
1126    
1127    #ifdef JFRIEDL_DEBUG
1128          if (S_arg >= 0 && S_arg < mrc)
1129            {
1130            int first = S_arg * 2;
1131            int last  = first + 1;
1132            fwrite(ptr, 1, offsets[first], stdout);
1133            fprintf(stdout, "X");
1134            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1135            }
1136          else
1137    #endif
1138    
1139          /* We have to split the line(s) up if colouring. */
1140    
1141          if (do_colour)
1142            {
1143            fwrite(ptr, 1, offsets[0], stdout);
1144            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1145            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1146            fprintf(stdout, "%c[00m", 0x1b);
1147            fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1148              stdout);
1149            }
1150          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1151          }
1152    
1153        /* End of doing what has to be done for a match */
1154    
1155        rc = 0;    /* Had some success */
1156    
1157        /* Remember where the last match happened for after_context. We remember
1158        where we are about to restart, and that line's number. */
1159    
1160        lastmatchrestart = ptr + linelength + endlinelength;
1161        lastmatchnumber = linenumber + 1;
1162        }
1163    
1164      /* For a match in multiline inverted mode (which of course did not cause
1165      anything to be printed), we have to move on to the end of the match before
1166      proceeding. */
1167    
1168      if (multiline && invert && match)
1169        {
1170        int ellength;
1171        char *endmatch = ptr + offsets[1];
1172        t = ptr;
1173        while (t < endmatch)
1174          {
1175          t = end_of_line(t, endptr, &ellength);
1176          if (t <= endmatch) linenumber++; else break;
1177          }
1178        endmatch = end_of_line(endmatch, endptr, &ellength);
1179        linelength = endmatch - ptr - ellength;
1180        }
1181    
1182      /* Advance to after the newline and increment the line number. The file
1183      offset to the current line is maintained in filepos. */
1184    
1185      ptr += linelength + endlinelength;
1186      filepos += linelength + endlinelength;
1187      linenumber++;
1188    
1189      /* If we haven't yet reached the end of the file (the buffer is full), and
1190      the current point is in the top 1/3 of the buffer, slide the buffer down by
1191      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1192      about to be lost, print them. */
1193    
1194      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1195        {
1196        if (after_context > 0 &&
1197            lastmatchnumber > 0 &&
1198            lastmatchrestart < buffer + MBUFTHIRD)
1199          {
1200          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1201          lastmatchnumber = 0;
1202          }
1203    
1204        /* Now do the shuffle */
1205    
1206        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1207        ptr -= MBUFTHIRD;
1208        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1209        endptr = buffer + bufflength;
1210    
1211        /* Adjust any last match point */
1212    
1213        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1214        }
1215      }     /* Loop through the whole file */
1216    
1217    /* End of file; print final "after" lines if wanted; do_after_lines sets
1218    hyphenpending if it prints something. */
1219    
1220    if (!only_matching && !count_only)
1221      {
1222      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1223      hyphenpending |= endhyphenpending;
1224      }
1225    
1226    /* Print the file name if we are looking for those without matches and there
1227    were none. If we found a match, we won't have got this far. */
1228    
1229    if (filenames == FN_NOMATCH_ONLY)
1230      {
1231      fprintf(stdout, "%s\n", printname);
1232      return 0;
1233      }
1234    
1235    /* Print the match count if wanted */
1236    
1237    if (count_only)
1238      {
1239      if (printname != NULL) fprintf(stdout, "%s:", printname);
1240      fprintf(stdout, "%d\n", count);
1241      }
1242    
1243    return rc;
1244    }
1245    
1246    
1247    
1248    /*************************************************
1249    *     Grep a file or recurse into a directory    *
1250    *************************************************/
1251    
1252    /* Given a path name, if it's a directory, scan all the files if we are
1253    recursing; if it's a file, grep it.
1254    
1255    Arguments:
1256      pathname          the path to investigate
1257      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1258      only_one_at_top   TRUE if the path is the only one at toplevel
1259    
1260    Returns:   0 if there was at least one match
1261               1 if there were no matches
1262               2 there was some kind of error
1263    
1264    However, file opening failures are suppressed if "silent" is set.
1265    */
1266    
1267    static int
1268    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1269    {
1270    int rc = 1;
1271    int sep;
1272    FILE *in;
1273    
1274    /* If the file name is "-" we scan stdin */
1275    
1276    if (strcmp(pathname, "-") == 0)
1277      {
1278      return pcregrep(stdin,
1279        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1280          stdin_name : NULL);
1281      }
1282    
1283    
1284    /* If the file is a directory, skip if skipping or if we are recursing, scan
1285    each file within it, subject to any include or exclude patterns that were set.
1286    The scanning code is localized so it can be made system-specific. */
1287    
1288    if ((sep = isdirectory(pathname)) != 0)
1289      {
1290      if (dee_action == dee_SKIP) return 1;
1291      if (dee_action == dee_RECURSE)
1292        {
1293        char buffer[1024];
1294        char *nextfile;
1295        directory_type *dir = opendirectory(pathname);
1296    
1297        if (dir == NULL)
1298          {
1299          if (!silent)
1300            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1301              strerror(errno));
1302          return 2;
1303          }
1304    
1305        while ((nextfile = readdirectory(dir)) != NULL)
1306          {
1307          int frc, blen;
1308          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1309          blen = strlen(buffer);
1310    
1311          if (exclude_compiled != NULL &&
1312              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1313            continue;
1314    
1315          if (include_compiled != NULL &&
1316              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1317            continue;
1318    
1319          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1320          if (frc > 1) rc = frc;
1321           else if (frc == 0 && rc == 1) rc = 0;
1322          }
1323    
1324        closedirectory(dir);
1325        return rc;
1326        }
1327      }
1328    
1329    /* If the file is not a directory and not a regular file, skip it if that's
1330    been requested. */
1331    
1332    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1333    
1334    /* Control reaches here if we have a regular file, or if we have a directory
1335    and recursion or skipping was not requested, or if we have anything else and
1336    skipping was not requested. The scan proceeds. If this is the first and only
1337    argument at top level, we don't show the file name, unless we are only showing
1338    the file name, or the filename was forced (-H). */
1339    
1340    in = fopen(pathname, "r");
1341    if (in == NULL)
1342      {
1343      if (!silent)
1344        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1345          strerror(errno));
1346      return 2;
1347      }
1348    
1349    rc = pcregrep(in, (filenames > FN_DEFAULT ||
1350      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1351    
1352    fclose(in);
1353    return rc;
1354    }
1355    
1356    
1357    
1358    
1359    /*************************************************
1360    *                Usage function                  *
1361    *************************************************/
1362    
1363    static int
1364    usage(int rc)
1365    {
1366    option_item *op;
1367    fprintf(stderr, "Usage: pcregrep [-");
1368    for (op = optionlist; op->one_char != 0; op++)
1369      {
1370      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1371      }
1372    fprintf(stderr, "] [long options] [pattern] [files]\n");
1373    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1374      "options.\n");
1375    return rc;
1376    }
1377    
1378    
1379    
1380    
1381    /*************************************************
1382    *                Help function                   *
1383    *************************************************/
1384    
1385    static void
1386    help(void)
1387    {
1388    option_item *op;
1389    
1390    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1391    printf("Search for PATTERN in each FILE or standard input.\n");
1392    printf("PATTERN must be present if neither -e nor -f is used.\n");
1393    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1394    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1395    
1396    printf("Options:\n");
1397    
1398    for (op = optionlist; op->one_char != 0; op++)
1399      {
1400      int n;
1401      char s[4];
1402      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1403      printf("  %s --%s%n", s, op->long_name, &n);
1404      n = 30 - n;
1405      if (n < 1) n = 1;
1406      printf("%.*s%s\n", n, "                    ", op->help_text);
1407      }
1408    
1409    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1410    printf("trailing white space is removed and blank lines are ignored.\n");
1411    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1412    
1413    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1414    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1415    }
1416    
1417    
1418    
1419    
1420    /*************************************************
1421    *    Handle a single-letter, no data option      *
1422    *************************************************/
1423    
1424    static int
1425    handle_option(int letter, int options)
1426    {
1427    switch(letter)
1428      {
1429      case N_FOFFSETS: file_offsets = TRUE; break;
1430      case N_HELP: help(); exit(0);
1431      case N_LOFFSETS: line_offsets = number = TRUE; break;
1432      case 'c': count_only = TRUE; break;
1433      case 'F': process_options |= PO_FIXED_STRINGS; break;
1434      case 'H': filenames = FN_FORCE; break;
1435      case 'h': filenames = FN_NONE; break;
1436      case 'i': options |= PCRE_CASELESS; break;
1437      case 'l': filenames = FN_ONLY; break;
1438      case 'L': filenames = FN_NOMATCH_ONLY; break;
1439      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1440      case 'n': number = TRUE; break;
1441      case 'o': only_matching = TRUE; break;
1442      case 'q': quiet = TRUE; break;
1443      case 'r': dee_action = dee_RECURSE; break;
1444      case 's': silent = TRUE; break;
1445      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1446      case 'v': invert = TRUE; break;
1447      case 'w': process_options |= PO_WORD_MATCH; break;
1448      case 'x': process_options |= PO_LINE_MATCH; break;
1449    
1450      case 'V':
1451      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1452      exit(0);
1453      break;
1454    
1455      default:
1456      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1457      exit(usage(2));
1458      }
1459    
1460    return options;
1461    }
1462    
1463    
1464    
1465    
1466    /*************************************************
1467    *          Construct printed ordinal             *
1468    *************************************************/
1469    
1470    /* This turns a number into "1st", "3rd", etc. */
1471    
1472    static char *
1473    ordin(int n)
1474    {
1475    static char buffer[8];
1476    char *p = buffer;
1477    sprintf(p, "%d", n);
1478    while (*p != 0) p++;
1479    switch (n%10)
1480      {
1481      case 1: strcpy(p, "st"); break;
1482      case 2: strcpy(p, "nd"); break;
1483      case 3: strcpy(p, "rd"); break;
1484      default: strcpy(p, "th"); break;
1485      }
1486    return buffer;
1487    }
1488    
1489    
1490    
1491    /*************************************************
1492    *          Compile a single pattern              *
1493    *************************************************/
1494    
1495    /* When the -F option has been used, this is called for each substring.
1496    Otherwise it's called for each supplied pattern.
1497    
1498    Arguments:
1499      pattern        the pattern string
1500      options        the PCRE options
1501      filename       the file name, or NULL for a command-line pattern
1502      count          0 if this is the only command line pattern, or
1503                     number of the command line pattern, or
1504                     linenumber for a pattern from a file
1505    
1506    Returns:         TRUE on success, FALSE after an error
1507    */
1508    
1509    static BOOL
1510    compile_single_pattern(char *pattern, int options, char *filename, int count)
1511    {
1512    char buffer[MBUFTHIRD + 16];
1513    const char *error;
1514    int errptr;
1515    
1516    if (pattern_count >= MAX_PATTERN_COUNT)
1517      {
1518      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1519        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1520      return FALSE;
1521      }
1522    
1523    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1524      suffix[process_options]);
1525    pattern_list[pattern_count] =
1526      pcre_compile(buffer, options, &error, &errptr, pcretables);
1527    if (pattern_list[pattern_count] != NULL)
1528      {
1529      pattern_count++;
1530      return TRUE;
1531      }
1532    
1533    /* Handle compile errors */
1534    
1535    errptr -= (int)strlen(prefix[process_options]);
1536    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1537    
1538    if (filename == NULL)
1539      {
1540      if (count == 0)
1541        fprintf(stderr, "pcregrep: Error in command-line regex "
1542          "at offset %d: %s\n", errptr, error);
1543      else
1544        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1545          "at offset %d: %s\n", ordin(count), errptr, error);
1546      }
1547    else
1548      {
1549      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1550        "at offset %d: %s\n", count, filename, errptr, error);
1551      }
1552    
1553    return FALSE;
1554    }
1555    
1556    
1557    
1558    /*************************************************
1559    *           Compile one supplied pattern         *
1560    *************************************************/
1561    
1562    /* When the -F option has been used, each string may be a list of strings,
1563    separated by line breaks. They will be matched literally.
1564    
1565    Arguments:
1566      pattern        the pattern string
1567      options        the PCRE options
1568      filename       the file name, or NULL for a command-line pattern
1569      count          0 if this is the only command line pattern, or
1570                     number of the command line pattern, or
1571                     linenumber for a pattern from a file
1572    
1573    Returns:         TRUE on success, FALSE after an error
1574    */
1575    
1576    static BOOL
1577    compile_pattern(char *pattern, int options, char *filename, int count)
1578    {
1579    if ((process_options & PO_FIXED_STRINGS) != 0)
1580      {
1581      char *eop = pattern + strlen(pattern);
1582      char buffer[MBUFTHIRD];
1583      for(;;)
1584        {
1585        int ellength;
1586        char *p = end_of_line(pattern, eop, &ellength);
1587        if (ellength == 0)
1588          return compile_single_pattern(pattern, options, filename, count);
1589        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1590        pattern = p;
1591        if (!compile_single_pattern(buffer, options, filename, count))
1592          return FALSE;
1593        }
1594      }
1595    else return compile_single_pattern(pattern, options, filename, count);
1596    }
1597    
1598    
1599    
1600    /*************************************************
1601    *                Main program                    *
1602    *************************************************/
1603    
1604    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1605    
1606    int
1607    main(int argc, char **argv)
1608    {
1609    int i, j;
1610    int rc = 1;
1611    int pcre_options = 0;
1612    int cmd_pattern_count = 0;
1613    int hint_count = 0;
1614    int errptr;
1615    BOOL only_one_at_top;
1616    char *patterns[MAX_PATTERN_COUNT];
1617    const char *locale_from = "--locale";
1618    const char *error;
1619    
1620    /* Set the default line ending value from the default in the PCRE library;
1621    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1622    */
1623    
1624    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1625    switch(i)
1626      {
1627      default:                 newline = (char *)"lf"; break;
1628      case '\r':               newline = (char *)"cr"; break;
1629      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1630      case -1:                 newline = (char *)"any"; break;
1631      case -2:                 newline = (char *)"anycrlf"; break;
1632      }
1633    
1634    /* Process the options */
1635    
1636    for (i = 1; i < argc; i++)
1637      {
1638      option_item *op = NULL;
1639      char *option_data = (char *)"";    /* default to keep compiler happy */
1640      BOOL longop;
1641      BOOL longopwasequals = FALSE;
1642    
1643      if (argv[i][0] != '-') break;
1644    
1645      /* If we hit an argument that is just "-", it may be a reference to STDIN,
1646      but only if we have previously had -e or -f to define the patterns. */
1647    
1648      if (argv[i][1] == 0)
1649        {
1650        if (pattern_filename != NULL || pattern_count > 0) break;
1651          else exit(usage(2));
1652        }
1653    
1654      /* Handle a long name option, or -- to terminate the options */
1655    
1656      if (argv[i][1] == '-')
1657        {
1658        char *arg = argv[i] + 2;
1659        char *argequals = strchr(arg, '=');
1660    
1661        if (*arg == 0)    /* -- terminates options */
1662          {
1663          i++;
1664          break;                /* out of the options-handling loop */
1665          }
1666    
1667        longop = TRUE;
1668    
1669        /* Some long options have data that follows after =, for example file=name.
1670        Some options have variations in the long name spelling: specifically, we
1671        allow "regexp" because GNU grep allows it, though I personally go along
1672        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1673        These options are entered in the table as "regex(p)". No option is in both
1674        these categories, fortunately. */
1675    
1676        for (op = optionlist; op->one_char != 0; op++)
1677          {
1678          char *opbra = strchr(op->long_name, '(');
1679          char *equals = strchr(op->long_name, '=');
1680          if (opbra == NULL)     /* Not a (p) case */
1681            {
1682            if (equals == NULL)  /* Not thing=data case */
1683              {
1684              if (strcmp(arg, op->long_name) == 0) break;
1685              }
1686            else                 /* Special case xxx=data */
1687              {
1688              int oplen = equals - op->long_name;
1689              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1690              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1691                {
1692                option_data = arg + arglen;
1693                if (*option_data == '=')
1694                  {
1695                  option_data++;
1696                  longopwasequals = TRUE;
1697                  }
1698                break;
1699                }
1700              }
1701            }
1702          else                   /* Special case xxxx(p) */
1703            {
1704            char buff1[24];
1705            char buff2[24];
1706            int baselen = opbra - op->long_name;
1707            sprintf(buff1, "%.*s", baselen, op->long_name);
1708            sprintf(buff2, "%s%.*s", buff1,
1709              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1710            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1711              break;
1712            }
1713          }
1714    
1715        if (op->one_char == 0)
1716          {
1717          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1718          exit(usage(2));
1719          }
1720        }
1721    
1722    
1723      /* Jeffrey Friedl's debugging harness uses these additional options which
1724      are not in the right form for putting in the option table because they use
1725      only one hyphen, yet are more than one character long. By putting them
1726      separately here, they will not get displayed as part of the help() output,
1727      but I don't think Jeffrey will care about that. */
1728    
1729    #ifdef JFRIEDL_DEBUG
1730      else if (strcmp(argv[i], "-pre") == 0) {
1731              jfriedl_prefix = argv[++i];
1732              continue;
1733      } else if (strcmp(argv[i], "-post") == 0) {
1734              jfriedl_postfix = argv[++i];
1735              continue;
1736      } else if (strcmp(argv[i], "-XT") == 0) {
1737              sscanf(argv[++i], "%d", &jfriedl_XT);
1738              continue;
1739      } else if (strcmp(argv[i], "-XR") == 0) {
1740              sscanf(argv[++i], "%d", &jfriedl_XR);
1741              continue;
1742      }
1743    #endif
1744    
1745    
1746      /* One-char options; many that have no data may be in a single argument; we
1747      continue till we hit the last one or one that needs data. */
1748    
1749      else
1750        {
1751        char *s = argv[i] + 1;
1752        longop = FALSE;
1753        while (*s != 0)
1754          {
1755          for (op = optionlist; op->one_char != 0; op++)
1756            { if (*s == op->one_char) break; }
1757          if (op->one_char == 0)
1758            {
1759            fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1760              *s, argv[i]);
1761            exit(usage(2));
1762            }
1763          if (op->type != OP_NODATA || s[1] == 0)
1764            {
1765            option_data = s+1;
1766            break;
1767            }
1768          pcre_options = handle_option(*s++, pcre_options);
1769          }
1770        }
1771    
1772      /* At this point we should have op pointing to a matched option. If the type
1773      is NO_DATA, it means that there is no data, and the option might set
1774      something in the PCRE options. */
1775    
1776      if (op->type == OP_NODATA)
1777        {
1778        pcre_options = handle_option(op->one_char, pcre_options);
1779        continue;
1780        }
1781    
1782      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1783      either has a value or defaults to something. It cannot have data in a
1784      separate item. At the moment, the only such options are "colo(u)r" and
1785      Jeffrey Friedl's special -S debugging option. */
1786    
1787      if (*option_data == 0 &&
1788          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1789        {
1790        switch (op->one_char)
1791          {
1792          case N_COLOUR:
1793          colour_option = (char *)"auto";
1794          break;
1795    #ifdef JFRIEDL_DEBUG
1796          case 'S':
1797          S_arg = 0;
1798          break;
1799    #endif
1800          }
1801        continue;
1802        }
1803    
1804      /* Otherwise, find the data string for the option. */
1805    
1806      if (*option_data == 0)
1807        {
1808        if (i >= argc - 1 || longopwasequals)
1809          {
1810          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1811          exit(usage(2));
1812          }
1813        option_data = argv[++i];
1814        }
1815    
1816      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1817      multiple times to create a list of patterns. */
1818    
1819      if (op->type == OP_PATLIST)
1820        {
1821        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1822          {
1823          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1824            MAX_PATTERN_COUNT);
1825          return 2;
1826          }
1827        patterns[cmd_pattern_count++] = option_data;
1828        }
1829    
1830      /* Otherwise, deal with single string or numeric data values. */
1831    
1832      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1833        {
1834        *((char **)op->dataptr) = option_data;
1835        }
1836      else
1837        {
1838        char *endptr;
1839        int n = strtoul(option_data, &endptr, 10);
1840        if (*endptr != 0)
1841          {
1842          if (longop)
1843            {
1844            char *equals = strchr(op->long_name, '=');
1845            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1846              equals - op->long_name;
1847            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1848              option_data, nlen, op->long_name);
1849            }
1850          else
1851            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1852              option_data, op->one_char);
1853          exit(usage(2));
1854          }
1855        *((int *)op->dataptr) = n;
1856        }
1857      }
1858    
1859    /* Options have been decoded. If -C was used, its value is used as a default
1860    for -A and -B. */
1861    
1862    if (both_context > 0)
1863      {
1864      if (after_context == 0) after_context = both_context;
1865      if (before_context == 0) before_context = both_context;
1866      }
1867    
1868    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
1869    However, the latter two set the only_matching flag. */
1870    
1871    if ((only_matching && (file_offsets || line_offsets)) ||
1872        (file_offsets && line_offsets))
1873      {
1874      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
1875        "and/or --line-offsets\n");
1876      exit(usage(2));
1877      }
1878    
1879    if (file_offsets || line_offsets) only_matching = TRUE;
1880    
1881    /* If a locale has not been provided as an option, see if the LC_CTYPE or
1882    LC_ALL environment variable is set, and if so, use it. */
1883    
1884    if (locale == NULL)
1885      {
1886      locale = getenv("LC_ALL");
1887      locale_from = "LCC_ALL";
1888      }
1889    
1890    if (locale == NULL)
1891      {
1892      locale = getenv("LC_CTYPE");
1893      locale_from = "LC_CTYPE";
1894      }
1895    
1896    /* If a locale has been provided, set it, and generate the tables the PCRE
1897    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1898    
1899    if (locale != NULL)
1900      {
1901      if (setlocale(LC_CTYPE, locale) == NULL)
1902        {
1903        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1904          locale, locale_from);
1905        return 2;
1906        }
1907      pcretables = pcre_maketables();
1908      }
1909    
1910    /* Sort out colouring */
1911    
1912    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1913      {
1914      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1915      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1916      else
1917        {
1918        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1919          colour_option);
1920        return 2;
1921        }
1922      if (do_colour)
1923        {
1924        char *cs = getenv("PCREGREP_COLOUR");
1925        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1926        if (cs != NULL) colour_string = cs;
1927        }
1928      }
1929    
1930    /* Interpret the newline type; the default settings are Unix-like. */
1931    
1932    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1933      {
1934      pcre_options |= PCRE_NEWLINE_CR;
1935      endlinetype = EL_CR;
1936      }
1937    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1938      {
1939      pcre_options |= PCRE_NEWLINE_LF;
1940      endlinetype = EL_LF;
1941      }
1942    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1943      {
1944      pcre_options |= PCRE_NEWLINE_CRLF;
1945      endlinetype = EL_CRLF;
1946      }
1947    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1948      {
1949      pcre_options |= PCRE_NEWLINE_ANY;
1950      endlinetype = EL_ANY;
1951      }
1952    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1953      {
1954      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1955      endlinetype = EL_ANYCRLF;
1956      }
1957    else
1958      {
1959      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1960      return 2;
1961      }
1962    
1963    /* Interpret the text values for -d and -D */
1964    
1965    if (dee_option != NULL)
1966      {
1967      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1968      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1969      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1970      else
1971        {
1972        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1973        return 2;
1974        }
1975      }
1976    
1977    if (DEE_option != NULL)
1978      {
1979      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1980      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1981      else
1982        {
1983        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1984        return 2;
1985        }
1986      }
1987    
1988    /* Check the values for Jeffrey Friedl's debugging options. */
1989    
1990    #ifdef JFRIEDL_DEBUG
1991    if (S_arg > 9)
1992      {
1993      fprintf(stderr, "pcregrep: bad value for -S option\n");
1994      return 2;
1995      }
1996    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1997      {
1998      if (jfriedl_XT == 0) jfriedl_XT = 1;
1999      if (jfriedl_XR == 0) jfriedl_XR = 1;
2000      }
2001    #endif
2002    
2003    /* Get memory to store the pattern and hints lists. */
2004    
2005    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2006    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2007    
2008    if (pattern_list == NULL || hints_list == NULL)
2009      {
2010      fprintf(stderr, "pcregrep: malloc failed\n");
2011      goto EXIT2;
2012      }
2013    
2014    /* If no patterns were provided by -e, and there is no file provided by -f,
2015    the first argument is the one and only pattern, and it must exist. */
2016    
2017    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2018      {
2019      if (i >= argc) return usage(2);
2020      patterns[cmd_pattern_count++] = argv[i++];
2021      }
2022    
2023    /* Compile the patterns that were provided on the command line, either by
2024    multiple uses of -e or as a single unkeyed pattern. */
2025    
2026    for (j = 0; j < cmd_pattern_count; j++)
2027      {
2028      if (!compile_pattern(patterns[j], pcre_options, NULL,
2029           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2030        goto EXIT2;
2031      }
2032    
2033    /* Compile the regular expressions that are provided in a file. */
2034    
2035    if (pattern_filename != NULL)
2036      {
2037      int linenumber = 0;
2038      FILE *f;
2039      char *filename;
2040      char buffer[MBUFTHIRD];
2041    
2042      if (strcmp(pattern_filename, "-") == 0)
2043        {
2044        f = stdin;
2045        filename = stdin_name;
2046        }
2047      else
2048        {
2049        f = fopen(pattern_filename, "r");
2050        if (f == NULL)
2051          {
2052          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2053            strerror(errno));
2054          goto EXIT2;
2055          }
2056        filename = pattern_filename;
2057        }
2058    
2059      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2060        {
2061        char *s = buffer + (int)strlen(buffer);
2062        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2063        *s = 0;
2064        linenumber++;
2065        if (buffer[0] == 0) continue;   /* Skip blank lines */
2066        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2067          goto EXIT2;
2068        }
2069    
2070      if (f != stdin) fclose(f);
2071      }
2072    
2073    /* Study the regular expressions, as we will be running them many times */
2074    
2075    for (j = 0; j < pattern_count; j++)
2076      {
2077      hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2078      if (error != NULL)
2079        {
2080        char s[16];
2081        if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2082        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2083        goto EXIT2;
2084        }
2085      hint_count++;
2086      }
2087    
2088    /* If there are include or exclude patterns, compile them. */
2089    
2090    if (exclude_pattern != NULL)
2091      {
2092      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2093        pcretables);
2094      if (exclude_compiled == NULL)
2095        {
2096        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2097          errptr, error);
2098        goto EXIT2;
2099        }
2100      }
2101    
2102    if (include_pattern != NULL)
2103      {
2104      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2105        pcretables);
2106      if (include_compiled == NULL)
2107        {
2108        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2109          errptr, error);
2110        goto EXIT2;
2111        }
2112      }
2113    
2114    /* If there are no further arguments, do the business on stdin and exit. */
2115    
2116    if (i >= argc)
2117      {
2118      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2119      goto EXIT;
2120      }
2121    
2122    /* Otherwise, work through the remaining arguments as files or directories.
2123    Pass in the fact that there is only one argument at top level - this suppresses
2124    the file name if the argument is not a directory and filenames are not
2125    otherwise forced. */
2126    
2127    only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2128    
2129    for (; i < argc; i++)
2130      {
2131      int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2132        only_one_at_top);
2133      if (frc > 1) rc = frc;
2134        else if (frc == 0 && rc == 1) rc = 0;
2135      }
2136    
2137    EXIT:
2138    if (pattern_list != NULL)
2139      {
2140      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2141      free(pattern_list);
2142      }
2143    if (hints_list != NULL)
2144      {
2145      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2146      free(hints_list);
2147      }
2148    return rc;
2149    
2150    EXIT2:
2151    rc = 2;
2152    goto EXIT;
2153  }  }
2154    
2155  /* End */  /* End of pcregrep */

Legend:
Removed from v.49  
changed lines
  Added in v.280

  ViewVC Help
Powered by ViewVC 1.1.5