/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 283 by ph10, Fri Dec 7 19:59:19 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2007 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44    #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58  #include "pcre.h"  #include "pcre.h"
59    
60  #define FALSE 0  #define FALSE 0
# Line 17  its pattern matching. */ Line 62  its pattern matching. */
62    
63  typedef int BOOL;  typedef int BOOL;
64    
65    #define MAX_PATTERN_COUNT 100
66    
67    #if BUFSIZ > 8192
68    #define MBUFTHIRD BUFSIZ
69    #else
70    #define MBUFTHIRD 8192
71    #endif
72    
73    /* Values for the "filenames" variable, which specifies options for file name
74    output. The order is important; it is assumed that a file name is wanted for
75    all values greater than FN_DEFAULT. */
76    
77    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79    /* Actions for the -d and -D options */
80    
81    enum { dee_READ, dee_SKIP, dee_RECURSE };
82    enum { DEE_READ, DEE_SKIP };
83    
84    /* Actions for special processing options (flag bits) */
85    
86    #define PO_WORD_MATCH     0x0001
87    #define PO_LINE_MATCH     0x0002
88    #define PO_FIXED_STRINGS  0x0004
89    
90    /* Line ending types */
91    
92    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
96  /*************************************************  /*************************************************
97  *               Global variables                 *  *               Global variables                 *
98  *************************************************/  *************************************************/
99    
100  static pcre *pattern;  /* Jeffrey Friedl has some debugging requirements that are not part of the
101  static pcre_extra *hints;  regular code. */
102    
103    #ifdef JFRIEDL_DEBUG
104    static int S_arg = -1;
105    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107    static const char *jfriedl_prefix = "";
108    static const char *jfriedl_postfix = "";
109    #endif
110    
111    static int  endlinetype;
112    
113    static char *colour_string = (char *)"1;31";
114    static char *colour_option = NULL;
115    static char *dee_option = NULL;
116    static char *DEE_option = NULL;
117    static char *newline = NULL;
118    static char *pattern_filename = NULL;
119    static char *stdin_name = (char *)"(standard input)";
120    static char *locale = NULL;
121    
122    static const unsigned char *pcretables = NULL;
123    
124    static int  pattern_count = 0;
125    static pcre **pattern_list = NULL;
126    static pcre_extra **hints_list = NULL;
127    
128    static char *include_pattern = NULL;
129    static char *exclude_pattern = NULL;
130    
131    static pcre *include_compiled = NULL;
132    static pcre *exclude_compiled = NULL;
133    
134    static int after_context = 0;
135    static int before_context = 0;
136    static int both_context = 0;
137    static int dee_action = dee_READ;
138    static int DEE_action = DEE_READ;
139    static int error_count = 0;
140    static int filenames = FN_DEFAULT;
141    static int process_options = 0;
142    
143  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
144  static BOOL filenames_only = FALSE;  static BOOL do_colour = FALSE;
145    static BOOL file_offsets = FALSE;
146    static BOOL hyphenpending = FALSE;
147  static BOOL invert = FALSE;  static BOOL invert = FALSE;
148    static BOOL line_offsets = FALSE;
149    static BOOL multiline = FALSE;
150  static BOOL number = FALSE;  static BOOL number = FALSE;
151    static BOOL only_matching = FALSE;
152    static BOOL quiet = FALSE;
153  static BOOL silent = FALSE;  static BOOL silent = FALSE;
154  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
155    
156    /* Structure for options and list of them */
157    
158    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
159           OP_PATLIST };
160    
161    typedef struct option_item {
162      int type;
163      int one_char;
164      void *dataptr;
165      const char *long_name;
166      const char *help_text;
167    } option_item;
168    
169    /* Options without a single-letter equivalent get a negative value. This can be
170    used to identify them. */
171    
172    #define N_COLOUR    (-1)
173    #define N_EXCLUDE   (-2)
174    #define N_HELP      (-3)
175    #define N_INCLUDE   (-4)
176    #define N_LABEL     (-5)
177    #define N_LOCALE    (-6)
178    #define N_NULL      (-7)
179    #define N_LOFFSETS  (-8)
180    #define N_FOFFSETS  (-9)
181    
182    static option_item optionlist[] = {
183      { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
184      { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
185      { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
186      { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
187      { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
188      { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
189      { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
190      { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
191      { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
192      { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
193      { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
194      { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
195      { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
196      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
197      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
198      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
199      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
200      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
201      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
202      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
203      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
204      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
205      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
206      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
207      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
208      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
209      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
210      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
211      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
212      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
213    #ifdef JFRIEDL_DEBUG
214      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
215    #endif
216      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
217      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
218      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
219      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
220      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
221      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
222      { OP_NODATA,    0,        NULL,               NULL,            NULL }
223    };
224    
225    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
226    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
227    that the combination of -w and -x has the same effect as -x on its own, so we
228    can treat them as the same. */
229    
230    static const char *prefix[] = {
231      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
232    
233    static const char *suffix[] = {
234      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
235    
236    /* UTF-8 tables - used only when the newline setting is "any". */
237    
238    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
239    
240    const char utf8_table4[] = {
241      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
242      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
243      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
244      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
245    
246    
247    
248    /*************************************************
249    *            OS-specific functions               *
250    *************************************************/
251    
252    /* These functions are defined so that they can be made system specific,
253    although at present the only ones are for Unix, Win32, and for "no support". */
254    
255    
256    /************* Directory scanning in Unix ***********/
257    
258    #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
259    #include <sys/types.h>
260    #include <sys/stat.h>
261    #include <dirent.h>
262    
263    typedef DIR directory_type;
264    
265    static int
266    isdirectory(char *filename)
267    {
268    struct stat statbuf;
269    if (stat(filename, &statbuf) < 0)
270      return 0;        /* In the expectation that opening as a file will fail */
271    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
272    }
273    
274    static directory_type *
275    opendirectory(char *filename)
276    {
277    return opendir(filename);
278    }
279    
280    static char *
281    readdirectory(directory_type *dir)
282    {
283    for (;;)
284      {
285      struct dirent *dent = readdir(dir);
286      if (dent == NULL) return NULL;
287      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
288        return dent->d_name;
289      }
290    /* Control never reaches here */
291    }
292    
293    static void
294    closedirectory(directory_type *dir)
295    {
296    closedir(dir);
297    }
298    
299    
300    /************* Test for regular file in Unix **********/
301    
302    static int
303    isregfile(char *filename)
304    {
305    struct stat statbuf;
306    if (stat(filename, &statbuf) < 0)
307      return 1;        /* In the expectation that opening as a file will fail */
308    return (statbuf.st_mode & S_IFMT) == S_IFREG;
309    }
310    
311    
312    /************* Test stdout for being a terminal in Unix **********/
313    
314    static BOOL
315    is_stdout_tty(void)
316    {
317    return isatty(fileno(stdout));
318    }
319    
320    
321    /************* Directory scanning in Win32 ***********/
322    
323    /* I (Philip Hazel) have no means of testing this code. It was contributed by
324    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
325    when it did not exist. David Byron added a patch that moved the #include of
326    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
327    */
328    
329    #elif HAVE_WINDOWS_H
330    
331    #ifndef STRICT
332    # define STRICT
333    #endif
334    #ifndef WIN32_LEAN_AND_MEAN
335    # define WIN32_LEAN_AND_MEAN
336    #endif
337    
338    #include <windows.h>
339    
340    #ifndef INVALID_FILE_ATTRIBUTES
341    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
342    #endif
343    
344    typedef struct directory_type
345    {
346    HANDLE handle;
347    BOOL first;
348    WIN32_FIND_DATA data;
349    } directory_type;
350    
351    int
352    isdirectory(char *filename)
353    {
354    DWORD attr = GetFileAttributes(filename);
355    if (attr == INVALID_FILE_ATTRIBUTES)
356      return 0;
357    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
358    }
359    
360    directory_type *
361    opendirectory(char *filename)
362    {
363    size_t len;
364    char *pattern;
365    directory_type *dir;
366    DWORD err;
367    len = strlen(filename);
368    pattern = (char *) malloc(len + 3);
369    dir = (directory_type *) malloc(sizeof(*dir));
370    if ((pattern == NULL) || (dir == NULL))
371      {
372      fprintf(stderr, "pcregrep: malloc failed\n");
373      exit(2);
374      }
375    memcpy(pattern, filename, len);
376    memcpy(&(pattern[len]), "\\*", 3);
377    dir->handle = FindFirstFile(pattern, &(dir->data));
378    if (dir->handle != INVALID_HANDLE_VALUE)
379      {
380      free(pattern);
381      dir->first = TRUE;
382      return dir;
383      }
384    err = GetLastError();
385    free(pattern);
386    free(dir);
387    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
388    return NULL;
389    }
390    
391    char *
392    readdirectory(directory_type *dir)
393    {
394    for (;;)
395      {
396      if (!dir->first)
397        {
398        if (!FindNextFile(dir->handle, &(dir->data)))
399          return NULL;
400        }
401      else
402        {
403        dir->first = FALSE;
404        }
405      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
406        return dir->data.cFileName;
407      }
408    #ifndef _MSC_VER
409    return NULL;   /* Keep compiler happy; never executed */
410    #endif
411    }
412    
413    void
414    closedirectory(directory_type *dir)
415    {
416    FindClose(dir->handle);
417    free(dir);
418    }
419    
420    
421    /************* Test for regular file in Win32 **********/
422    
423    /* I don't know how to do this, or if it can be done; assume all paths are
424    regular if they are not directories. */
425    
426    int isregfile(char *filename)
427    {
428    return !isdirectory(filename);
429    }
430    
431    
432    /************* Test stdout for being a terminal in Win32 **********/
433    
434    /* I don't know how to do this; assume never */
435    
436    static BOOL
437    is_stdout_tty(void)
438    {
439    return FALSE;
440    }
441    
442    
443    /************* Directory scanning when we can't do it ***********/
444    
445    /* The type is void, and apart from isdirectory(), the functions do nothing. */
446    
447    #else
448    
449    typedef void directory_type;
450    
451    int isdirectory(char *filename) { return 0; }
452    directory_type * opendirectory(char *filename) { return (directory_type*)0;}
453    char *readdirectory(directory_type *dir) { return (char*)0;}
454    void closedirectory(directory_type *dir) {}
455    
456    
457    /************* Test for regular when we can't do it **********/
458    
459    /* Assume all files are regular. */
460    
461    int isregfile(char *filename) { return 1; }
462    
463    
464    /************* Test stdout for being a terminal when we can't do it **********/
465    
466    static BOOL
467    is_stdout_tty(void)
468    {
469    return FALSE;
470    }
471    
472    
473    #endif
474    
475    
476  #if ! HAVE_STRERROR  
477    #ifndef HAVE_STRERROR
478  /*************************************************  /*************************************************
479  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
480  *************************************************/  *************************************************/
# Line 58  return sys_errlist[n]; Line 497  return sys_errlist[n];
497    
498    
499  /*************************************************  /*************************************************
500  *              Grep an individual file           *  *             Find end of line                   *
501  *************************************************/  *************************************************/
502    
503  static int  /* The length of the endline sequence that is found is set via lenptr. This may
504  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
 {  
 int rc = 1;  
 int linenumber = 0;  
 int count = 0;  
 int offsets[99];  
 char buffer[BUFSIZ];  
505    
506  while (fgets(buffer, sizeof(buffer), in) != NULL)  Arguments:
507    {    p         current position in line
508    BOOL match;    endptr    end of available data
509    int length = (int)strlen(buffer);    lenptr    where to put the length of the eol sequence
   if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;  
   linenumber++;  
510    
511    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;  Returns:    pointer to the last byte of the line
512    if (match && whole_lines && offsets[1] != length) match = FALSE;  */
513    
514    if (match != invert)  static char *
515    end_of_line(char *p, char *endptr, int *lenptr)
516    {
517    switch(endlinetype)
518      {
519      default:      /* Just in case */
520      case EL_LF:
521      while (p < endptr && *p != '\n') p++;
522      if (p < endptr)
523      {      {
524      if (count_only) count++;      *lenptr = 1;
525        return p + 1;
526        }
527      *lenptr = 0;
528      return endptr;
529    
530      case EL_CR:
531      while (p < endptr && *p != '\r') p++;
532      if (p < endptr)
533        {
534        *lenptr = 1;
535        return p + 1;
536        }
537      *lenptr = 0;
538      return endptr;
539    
540      else if (filenames_only)    case EL_CRLF:
541      for (;;)
542        {
543        while (p < endptr && *p != '\r') p++;
544        if (++p >= endptr)
545        {        {
546        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        *lenptr = 0;
547        return 0;        return endptr;
548          }
549        if (*p == '\n')
550          {
551          *lenptr = 2;
552          return p + 1;
553        }        }
554        }
555      break;
556    
557      else if (silent) return 0;    case EL_ANYCRLF:
558      while (p < endptr)
559        {
560        int extra = 0;
561        register int c = *((unsigned char *)p);
562    
563      else      if (utf8 && c >= 0xc0)
564        {        {
565        if (name != NULL) fprintf(stdout, "%s:", name);        int gcii, gcss;
566        if (number) fprintf(stdout, "%d:", linenumber);        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
567        fprintf(stdout, "%s\n", buffer);        gcss = 6*extra;
568          c = (c & utf8_table3[extra]) << gcss;
569          for (gcii = 1; gcii <= extra; gcii++)
570            {
571            gcss -= 6;
572            c |= (p[gcii] & 0x3f) << gcss;
573            }
574        }        }
575    
576      rc = 0;      p += 1 + extra;
     }  
   }  
577    
578  if (count_only)      switch (c)
579    {        {
580    if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
581    fprintf(stdout, "%d\n", count);        *lenptr = 1;
582    }        return p;
583    
584          case 0x0d:    /* CR */
585          if (p < endptr && *p == 0x0a)
586            {
587            *lenptr = 2;
588            p++;
589            }
590          else *lenptr = 1;
591          return p;
592    
593  return rc;        default:
594  }        break;
595          }
596        }   /* End of loop for ANYCRLF case */
597    
598      *lenptr = 0;  /* Must have hit the end */
599      return endptr;
600    
601      case EL_ANY:
602      while (p < endptr)
603        {
604        int extra = 0;
605        register int c = *((unsigned char *)p);
606    
607        if (utf8 && c >= 0xc0)
608          {
609          int gcii, gcss;
610          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
611          gcss = 6*extra;
612          c = (c & utf8_table3[extra]) << gcss;
613          for (gcii = 1; gcii <= extra; gcii++)
614            {
615            gcss -= 6;
616            c |= (p[gcii] & 0x3f) << gcss;
617            }
618          }
619    
620  /*************************************************      p += 1 + extra;
 *                Usage function                  *  
 *************************************************/  
621    
622  static int      switch (c)
623  usage(int rc)        {
624  {        case 0x0a:    /* LF */
625  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");        case 0x0b:    /* VT */
626  return rc;        case 0x0c:    /* FF */
627  }        *lenptr = 1;
628          return p;
629    
630          case 0x0d:    /* CR */
631          if (p < endptr && *p == 0x0a)
632            {
633            *lenptr = 2;
634            p++;
635            }
636          else *lenptr = 1;
637          return p;
638    
639          case 0x85:    /* NEL */
640          *lenptr = utf8? 2 : 1;
641          return p;
642    
643          case 0x2028:  /* LS */
644          case 0x2029:  /* PS */
645          *lenptr = 3;
646          return p;
647    
648          default:
649          break;
650          }
651        }   /* End of loop for ANY case */
652    
653      *lenptr = 0;  /* Must have hit the end */
654      return endptr;
655      }     /* End of overall switch */
656    }
657    
658    
659    
660  /*************************************************  /*************************************************
661  *                Main program                    *  *         Find start of previous line            *
662  *************************************************/  *************************************************/
663    
664  int  /* This is called when looking back for before lines to print.
 main(int argc, char **argv)  
 {  
 int i;  
 int rc = 1;  
 int options = 0;  
 int errptr;  
 const char *error;  
 BOOL filenames = TRUE;  
665    
666  /* Process the options */  Arguments:
667      p         start of the subsequent line
668      startptr  start of available data
669    
670  for (i = 1; i < argc; i++)  Returns:    pointer to the start of the previous line
671    */
672    
673    static char *
674    previous_line(char *p, char *startptr)
675    {
676    switch(endlinetype)
677    {    {
678    char *s;    default:      /* Just in case */
679    if (argv[i][0] != '-') break;    case EL_LF:
680    s = argv[i] + 1;    p--;
681    while (*s != 0)    while (p > startptr && p[-1] != '\n') p--;
682      return p;
683    
684      case EL_CR:
685      p--;
686      while (p > startptr && p[-1] != '\n') p--;
687      return p;
688    
689      case EL_CRLF:
690      for (;;)
691        {
692        p -= 2;
693        while (p > startptr && p[-1] != '\n') p--;
694        if (p <= startptr + 1 || p[-2] == '\r') return p;
695        }
696      return p;   /* But control should never get here */
697    
698      case EL_ANY:
699      case EL_ANYCRLF:
700      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
701      if (utf8) while ((*p & 0xc0) == 0x80) p--;
702    
703      while (p > startptr)
704      {      {
705      switch (*s++)      register int c;
706        char *pp = p - 1;
707    
708        if (utf8)
709          {
710          int extra = 0;
711          while ((*pp & 0xc0) == 0x80) pp--;
712          c = *((unsigned char *)pp);
713          if (c >= 0xc0)
714            {
715            int gcii, gcss;
716            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
717            gcss = 6*extra;
718            c = (c & utf8_table3[extra]) << gcss;
719            for (gcii = 1; gcii <= extra; gcii++)
720              {
721              gcss -= 6;
722              c |= (pp[gcii] & 0x3f) << gcss;
723              }
724            }
725          }
726        else c = *((unsigned char *)pp);
727    
728        if (endlinetype == EL_ANYCRLF) switch (c)
729        {        {
730        case 'c': count_only = TRUE; break;        case 0x0a:    /* LF */
731        case 'h': filenames = FALSE; break;        case 0x0d:    /* CR */
732        case 'i': options |= PCRE_CASELESS; break;        return p;
       case 'l': filenames_only = TRUE;  
       case 'n': number = TRUE; break;  
       case 's': silent = TRUE; break;  
       case 'v': invert = TRUE; break;  
       case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
733    
734        case 'V':        default:
       fprintf(stderr, "PCRE version %s\n", pcre_version());  
735        break;        break;
736          }
737    
738        else switch (c)
739          {
740          case 0x0a:    /* LF */
741          case 0x0b:    /* VT */
742          case 0x0c:    /* FF */
743          case 0x0d:    /* CR */
744          case 0x85:    /* NEL */
745          case 0x2028:  /* LS */
746          case 0x2029:  /* PS */
747          return p;
748    
749        default:        default:
750        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);        break;
       return usage(2);  
751        }        }
752    
753        p = pp;  /* Back one character */
754        }        /* End of loop for ANY case */
755    
756      return startptr;  /* Hit start of data */
757      }     /* End of overall switch */
758    }
759    
760    
761    
762    
763    
764    /*************************************************
765    *       Print the previous "after" lines         *
766    *************************************************/
767    
768    /* This is called if we are about to lose said lines because of buffer filling,
769    and at the end of the file. The data in the line is written using fwrite() so
770    that a binary zero does not terminate it.
771    
772    Arguments:
773      lastmatchnumber   the number of the last matching line, plus one
774      lastmatchrestart  where we restarted after the last match
775      endptr            end of available data
776      printname         filename for printing
777    
778    Returns:            nothing
779    */
780    
781    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
782      char *endptr, char *printname)
783    {
784    if (after_context > 0 && lastmatchnumber > 0)
785      {
786      int count = 0;
787      while (lastmatchrestart < endptr && count++ < after_context)
788        {
789        int ellength;
790        char *pp = lastmatchrestart;
791        if (printname != NULL) fprintf(stdout, "%s-", printname);
792        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
793        pp = end_of_line(pp, endptr, &ellength);
794        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
795        lastmatchrestart = pp;
796      }      }
797      hyphenpending = TRUE;
798    }    }
799    }
800    
 /* There must be at least a regexp argument */  
801    
 if (i >= argc) return usage(0);  
802    
803  /* Compile the regular expression. */  /*************************************************
804    *            Grep an individual file             *
805    *************************************************/
806    
807    /* This is called from grep_or_recurse() below. It uses a buffer that is three
808    times the value of MBUFTHIRD. The matching point is never allowed to stray into
809    the top third of the buffer, thus keeping more of the file available for
810    context printing or for multiline scanning. For large files, the pointer will
811    be in the middle third most of the time, so the bottom third is available for
812    "before" context printing.
813    
814    Arguments:
815      in           the fopened FILE stream
816      printname    the file name if it is to be printed for each match
817                   or NULL if the file name is not to be printed
818                   it cannot be NULL if filenames[_nomatch]_only is set
819    
820    Returns:       0 if there was at least one match
821                   1 otherwise (no matches)
822    */
823    
824    static int
825    pcregrep(FILE *in, char *printname)
826    {
827    int rc = 1;
828    int linenumber = 1;
829    int lastmatchnumber = 0;
830    int count = 0;
831    int filepos = 0;
832    int offsets[99];
833    char *lastmatchrestart = NULL;
834    char buffer[3*MBUFTHIRD];
835    char *ptr = buffer;
836    char *endptr;
837    size_t bufflength;
838    BOOL endhyphenpending = FALSE;
839    
840    /* Do the first read into the start of the buffer and set up the pointer to
841    end of what we have. */
842    
843    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
844    endptr = buffer + bufflength;
845    
846    /* Loop while the current pointer is not at the end of the file. For large
847    files, endptr will be at the end of the buffer when we are in the middle of the
848    file, but ptr will never get there, because as soon as it gets over 2/3 of the
849    way, the buffer is shifted left and re-filled. */
850    
851  pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  while (ptr < endptr)
 if (pattern == NULL)  
852    {    {
853    fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);    int i, endlinelength;
854    return 2;    int mrc = 0;
855    }    BOOL match = FALSE;
856      char *matchptr = ptr;
857      char *t = ptr;
858      size_t length, linelength;
859    
860      /* At this point, ptr is at the start of a line. We need to find the length
861      of the subject string to pass to pcre_exec(). In multiline mode, it is the
862      length remainder of the data in the buffer. Otherwise, it is the length of
863      the next line. After matching, we always advance by the length of the next
864      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
865      that any match is constrained to be in the first line. */
866    
867      t = end_of_line(t, endptr, &endlinelength);
868      linelength = t - ptr - endlinelength;
869      length = multiline? (size_t)(endptr - ptr) : linelength;
870    
871  /* Study the regular expression, as we will be running it may times */    /* Extra processing for Jeffrey Friedl's debugging. */
872    
873  hints = pcre_study(pattern, 0, &error);  #ifdef JFRIEDL_DEBUG
874  if (error != NULL)    if (jfriedl_XT || jfriedl_XR)
875    {    {
876    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);        #include <sys/time.h>
877    return 2;        #include <time.h>
878    }        struct timeval start_time, end_time;
879          struct timezone dummy;
880    
881  /* If there are no further arguments, do the business on stdin and exit */        if (jfriedl_XT)
882          {
883              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
884              const char *orig = ptr;
885              ptr = malloc(newlen + 1);
886              if (!ptr) {
887                      printf("out of memory");
888                      exit(2);
889              }
890              endptr = ptr;
891              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
892              for (i = 0; i < jfriedl_XT; i++) {
893                      strncpy(endptr, orig,  length);
894                      endptr += length;
895              }
896              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
897              length = newlen;
898          }
899    
900  if (i >= argc) return pcregrep(stdin, NULL);        if (gettimeofday(&start_time, &dummy) != 0)
901                  perror("bad gettimeofday");
902    
 /* Otherwise, work through the remaining arguments as files. If there is only  
 one, don't give its name on the output. */  
903    
904  if (i == argc - 1) filenames = FALSE;        for (i = 0; i < jfriedl_XR; i++)
905  if (filenames_only) filenames = TRUE;            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
906    
907  for (; i < argc; i++)        if (gettimeofday(&end_time, &dummy) != 0)
908    {                perror("bad gettimeofday");
909    FILE *in = fopen(argv[i], "r");  
910    if (in == NULL)        double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
911                          -
912                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
913    
914          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
915          return 0;
916      }
917    #endif
918    
919      /* We come back here after a match when the -o option (only_matching) is set,
920      in order to find any further matches in the same line. */
921    
922      ONLY_MATCHING_RESTART:
923    
924      /* Run through all the patterns until one matches. Note that we don't include
925      the final newline in the subject string. */
926    
927      for (i = 0; i < pattern_count; i++)
928      {      {
929      fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
930      rc = 2;        offsets, 99);
931        if (mrc >= 0) { match = TRUE; break; }
932        if (mrc != PCRE_ERROR_NOMATCH)
933          {
934          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
935          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
936          fprintf(stderr, "this line:\n");
937          fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
938          fprintf(stderr, "\n");
939          if (error_count == 0 &&
940              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
941            {
942            fprintf(stderr, "pcregrep: error %d means that a resource limit "
943              "was exceeded\n", mrc);
944            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
945            }
946          if (error_count++ > 20)
947            {
948            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
949            exit(2);
950            }
951          match = invert;    /* No more matching; don't show the line again */
952          break;
953          }
954      }      }
955    else  
956      /* If it's a match or a not-match (as required), do what's wanted. */
957    
958      if (match != invert)
959      {      {
960      int frc = pcregrep(in, filenames? argv[i] : NULL);      BOOL hyphenprinted = FALSE;
     if (frc == 0 && rc == 1) rc = 0;  
     fclose(in);  
     }  
   }  
961    
962  return rc;      /* We've failed if we want a file that doesn't have any matches. */
963    
964        if (filenames == FN_NOMATCH_ONLY) return 1;
965    
966        /* Just count if just counting is wanted. */
967    
968        if (count_only) count++;
969    
970        /* If all we want is a file name, there is no need to scan any more lines
971        in the file. */
972    
973        else if (filenames == FN_ONLY)
974          {
975          fprintf(stdout, "%s\n", printname);
976          return 0;
977          }
978    
979        /* Likewise, if all we want is a yes/no answer. */
980    
981        else if (quiet) return 0;
982    
983        /* The --only-matching option prints just the substring that matched, and
984        the --file-offsets and --line-offsets options output offsets for the
985        matching substring (they both force --only-matching). None of these options
986        prints any context. Afterwards, adjust the start and length, and then jump
987        back to look for further matches in the same line. If we are in invert
988        mode, however, nothing is printed - this could be still useful because the
989        return code is set. */
990    
991        else if (only_matching)
992          {
993          if (!invert)
994            {
995            if (printname != NULL) fprintf(stdout, "%s:", printname);
996            if (number) fprintf(stdout, "%d:", linenumber);
997            if (line_offsets)
998              fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
999                offsets[1] - offsets[0]);
1000            else if (file_offsets)
1001              fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1002                offsets[1] - offsets[0]);
1003            else
1004              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1005            fprintf(stdout, "\n");
1006            matchptr += offsets[1];
1007            length -= offsets[1];
1008            match = FALSE;
1009            goto ONLY_MATCHING_RESTART;
1010            }
1011          }
1012    
1013        /* This is the default case when none of the above options is set. We print
1014        the matching lines(s), possibly preceded and/or followed by other lines of
1015        context. */
1016    
1017        else
1018          {
1019          /* See if there is a requirement to print some "after" lines from a
1020          previous match. We never print any overlaps. */
1021    
1022          if (after_context > 0 && lastmatchnumber > 0)
1023            {
1024            int ellength;
1025            int linecount = 0;
1026            char *p = lastmatchrestart;
1027    
1028            while (p < ptr && linecount < after_context)
1029              {
1030              p = end_of_line(p, ptr, &ellength);
1031              linecount++;
1032              }
1033    
1034            /* It is important to advance lastmatchrestart during this printing so
1035            that it interacts correctly with any "before" printing below. Print
1036            each line's data using fwrite() in case there are binary zeroes. */
1037    
1038            while (lastmatchrestart < p)
1039              {
1040              char *pp = lastmatchrestart;
1041              if (printname != NULL) fprintf(stdout, "%s-", printname);
1042              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1043              pp = end_of_line(pp, endptr, &ellength);
1044              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1045              lastmatchrestart = pp;
1046              }
1047            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1048            }
1049    
1050          /* If there were non-contiguous lines printed above, insert hyphens. */
1051    
1052          if (hyphenpending)
1053            {
1054            fprintf(stdout, "--\n");
1055            hyphenpending = FALSE;
1056            hyphenprinted = TRUE;
1057            }
1058    
1059          /* See if there is a requirement to print some "before" lines for this
1060          match. Again, don't print overlaps. */
1061    
1062          if (before_context > 0)
1063            {
1064            int linecount = 0;
1065            char *p = ptr;
1066    
1067            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1068                   linecount < before_context)
1069              {
1070              linecount++;
1071              p = previous_line(p, buffer);
1072              }
1073    
1074            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1075              fprintf(stdout, "--\n");
1076    
1077            while (p < ptr)
1078              {
1079              int ellength;
1080              char *pp = p;
1081              if (printname != NULL) fprintf(stdout, "%s-", printname);
1082              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1083              pp = end_of_line(pp, endptr, &ellength);
1084              fwrite(p, 1, pp - p, stdout);
1085              p = pp;
1086              }
1087            }
1088    
1089          /* Now print the matching line(s); ensure we set hyphenpending at the end
1090          of the file if any context lines are being output. */
1091    
1092          if (after_context > 0 || before_context > 0)
1093            endhyphenpending = TRUE;
1094    
1095          if (printname != NULL) fprintf(stdout, "%s:", printname);
1096          if (number) fprintf(stdout, "%d:", linenumber);
1097    
1098          /* In multiline mode, we want to print to the end of the line in which
1099          the end of the matched string is found, so we adjust linelength and the
1100          line number appropriately, but only when there actually was a match
1101          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1102          the match will always be before the first newline sequence. */
1103    
1104          if (multiline)
1105            {
1106            int ellength;
1107            char *endmatch = ptr;
1108            if (!invert)
1109              {
1110              endmatch += offsets[1];
1111              t = ptr;
1112              while (t < endmatch)
1113                {
1114                t = end_of_line(t, endptr, &ellength);
1115                if (t <= endmatch) linenumber++; else break;
1116                }
1117              }
1118            endmatch = end_of_line(endmatch, endptr, &ellength);
1119            linelength = endmatch - ptr - ellength;
1120            }
1121    
1122          /*** NOTE: Use only fwrite() to output the data line, so that binary
1123          zeroes are treated as just another data character. */
1124    
1125          /* This extra option, for Jeffrey Friedl's debugging requirements,
1126          replaces the matched string, or a specific captured string if it exists,
1127          with X. When this happens, colouring is ignored. */
1128    
1129    #ifdef JFRIEDL_DEBUG
1130          if (S_arg >= 0 && S_arg < mrc)
1131            {
1132            int first = S_arg * 2;
1133            int last  = first + 1;
1134            fwrite(ptr, 1, offsets[first], stdout);
1135            fprintf(stdout, "X");
1136            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1137            }
1138          else
1139    #endif
1140    
1141          /* We have to split the line(s) up if colouring. */
1142    
1143          if (do_colour)
1144            {
1145            fwrite(ptr, 1, offsets[0], stdout);
1146            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1147            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1148            fprintf(stdout, "%c[00m", 0x1b);
1149            fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1150              stdout);
1151            }
1152          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1153          }
1154    
1155        /* End of doing what has to be done for a match */
1156    
1157        rc = 0;    /* Had some success */
1158    
1159        /* Remember where the last match happened for after_context. We remember
1160        where we are about to restart, and that line's number. */
1161    
1162        lastmatchrestart = ptr + linelength + endlinelength;
1163        lastmatchnumber = linenumber + 1;
1164        }
1165    
1166      /* For a match in multiline inverted mode (which of course did not cause
1167      anything to be printed), we have to move on to the end of the match before
1168      proceeding. */
1169    
1170      if (multiline && invert && match)
1171        {
1172        int ellength;
1173        char *endmatch = ptr + offsets[1];
1174        t = ptr;
1175        while (t < endmatch)
1176          {
1177          t = end_of_line(t, endptr, &ellength);
1178          if (t <= endmatch) linenumber++; else break;
1179          }
1180        endmatch = end_of_line(endmatch, endptr, &ellength);
1181        linelength = endmatch - ptr - ellength;
1182        }
1183    
1184      /* Advance to after the newline and increment the line number. The file
1185      offset to the current line is maintained in filepos. */
1186    
1187      ptr += linelength + endlinelength;
1188      filepos += linelength + endlinelength;
1189      linenumber++;
1190    
1191      /* If we haven't yet reached the end of the file (the buffer is full), and
1192      the current point is in the top 1/3 of the buffer, slide the buffer down by
1193      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1194      about to be lost, print them. */
1195    
1196      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1197        {
1198        if (after_context > 0 &&
1199            lastmatchnumber > 0 &&
1200            lastmatchrestart < buffer + MBUFTHIRD)
1201          {
1202          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1203          lastmatchnumber = 0;
1204          }
1205    
1206        /* Now do the shuffle */
1207    
1208        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1209        ptr -= MBUFTHIRD;
1210        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1211        endptr = buffer + bufflength;
1212    
1213        /* Adjust any last match point */
1214    
1215        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1216        }
1217      }     /* Loop through the whole file */
1218    
1219    /* End of file; print final "after" lines if wanted; do_after_lines sets
1220    hyphenpending if it prints something. */
1221    
1222    if (!only_matching && !count_only)
1223      {
1224      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1225      hyphenpending |= endhyphenpending;
1226      }
1227    
1228    /* Print the file name if we are looking for those without matches and there
1229    were none. If we found a match, we won't have got this far. */
1230    
1231    if (filenames == FN_NOMATCH_ONLY)
1232      {
1233      fprintf(stdout, "%s\n", printname);
1234      return 0;
1235      }
1236    
1237    /* Print the match count if wanted */
1238    
1239    if (count_only)
1240      {
1241      if (printname != NULL) fprintf(stdout, "%s:", printname);
1242      fprintf(stdout, "%d\n", count);
1243      }
1244    
1245    return rc;
1246    }
1247    
1248    
1249    
1250    /*************************************************
1251    *     Grep a file or recurse into a directory    *
1252    *************************************************/
1253    
1254    /* Given a path name, if it's a directory, scan all the files if we are
1255    recursing; if it's a file, grep it.
1256    
1257    Arguments:
1258      pathname          the path to investigate
1259      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1260      only_one_at_top   TRUE if the path is the only one at toplevel
1261    
1262    Returns:   0 if there was at least one match
1263               1 if there were no matches
1264               2 there was some kind of error
1265    
1266    However, file opening failures are suppressed if "silent" is set.
1267    */
1268    
1269    static int
1270    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1271    {
1272    int rc = 1;
1273    int sep;
1274    FILE *in;
1275    
1276    /* If the file name is "-" we scan stdin */
1277    
1278    if (strcmp(pathname, "-") == 0)
1279      {
1280      return pcregrep(stdin,
1281        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1282          stdin_name : NULL);
1283      }
1284    
1285    
1286    /* If the file is a directory, skip if skipping or if we are recursing, scan
1287    each file within it, subject to any include or exclude patterns that were set.
1288    The scanning code is localized so it can be made system-specific. */
1289    
1290    if ((sep = isdirectory(pathname)) != 0)
1291      {
1292      if (dee_action == dee_SKIP) return 1;
1293      if (dee_action == dee_RECURSE)
1294        {
1295        char buffer[1024];
1296        char *nextfile;
1297        directory_type *dir = opendirectory(pathname);
1298    
1299        if (dir == NULL)
1300          {
1301          if (!silent)
1302            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1303              strerror(errno));
1304          return 2;
1305          }
1306    
1307        while ((nextfile = readdirectory(dir)) != NULL)
1308          {
1309          int frc, blen;
1310          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1311          blen = strlen(buffer);
1312    
1313          if (exclude_compiled != NULL &&
1314              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1315            continue;
1316    
1317          if (include_compiled != NULL &&
1318              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1319            continue;
1320    
1321          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1322          if (frc > 1) rc = frc;
1323           else if (frc == 0 && rc == 1) rc = 0;
1324          }
1325    
1326        closedirectory(dir);
1327        return rc;
1328        }
1329      }
1330    
1331    /* If the file is not a directory and not a regular file, skip it if that's
1332    been requested. */
1333    
1334    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1335    
1336    /* Control reaches here if we have a regular file, or if we have a directory
1337    and recursion or skipping was not requested, or if we have anything else and
1338    skipping was not requested. The scan proceeds. If this is the first and only
1339    argument at top level, we don't show the file name, unless we are only showing
1340    the file name, or the filename was forced (-H). */
1341    
1342    in = fopen(pathname, "r");
1343    if (in == NULL)
1344      {
1345      if (!silent)
1346        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1347          strerror(errno));
1348      return 2;
1349      }
1350    
1351    rc = pcregrep(in, (filenames > FN_DEFAULT ||
1352      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1353    
1354    fclose(in);
1355    return rc;
1356    }
1357    
1358    
1359    
1360    
1361    /*************************************************
1362    *                Usage function                  *
1363    *************************************************/
1364    
1365    static int
1366    usage(int rc)
1367    {
1368    option_item *op;
1369    fprintf(stderr, "Usage: pcregrep [-");
1370    for (op = optionlist; op->one_char != 0; op++)
1371      {
1372      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1373      }
1374    fprintf(stderr, "] [long options] [pattern] [files]\n");
1375    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1376      "options.\n");
1377    return rc;
1378    }
1379    
1380    
1381    
1382    
1383    /*************************************************
1384    *                Help function                   *
1385    *************************************************/
1386    
1387    static void
1388    help(void)
1389    {
1390    option_item *op;
1391    
1392    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1393    printf("Search for PATTERN in each FILE or standard input.\n");
1394    printf("PATTERN must be present if neither -e nor -f is used.\n");
1395    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1396    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1397    
1398    printf("Options:\n");
1399    
1400    for (op = optionlist; op->one_char != 0; op++)
1401      {
1402      int n;
1403      char s[4];
1404      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1405      printf("  %s --%s%n", s, op->long_name, &n);
1406      n = 30 - n;
1407      if (n < 1) n = 1;
1408      printf("%.*s%s\n", n, "                    ", op->help_text);
1409      }
1410    
1411    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1412    printf("trailing white space is removed and blank lines are ignored.\n");
1413    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1414    
1415    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1416    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1417    }
1418    
1419    
1420    
1421    
1422    /*************************************************
1423    *    Handle a single-letter, no data option      *
1424    *************************************************/
1425    
1426    static int
1427    handle_option(int letter, int options)
1428    {
1429    switch(letter)
1430      {
1431      case N_FOFFSETS: file_offsets = TRUE; break;
1432      case N_HELP: help(); exit(0);
1433      case N_LOFFSETS: line_offsets = number = TRUE; break;
1434      case 'c': count_only = TRUE; break;
1435      case 'F': process_options |= PO_FIXED_STRINGS; break;
1436      case 'H': filenames = FN_FORCE; break;
1437      case 'h': filenames = FN_NONE; break;
1438      case 'i': options |= PCRE_CASELESS; break;
1439      case 'l': filenames = FN_ONLY; break;
1440      case 'L': filenames = FN_NOMATCH_ONLY; break;
1441      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1442      case 'n': number = TRUE; break;
1443      case 'o': only_matching = TRUE; break;
1444      case 'q': quiet = TRUE; break;
1445      case 'r': dee_action = dee_RECURSE; break;
1446      case 's': silent = TRUE; break;
1447      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1448      case 'v': invert = TRUE; break;
1449      case 'w': process_options |= PO_WORD_MATCH; break;
1450      case 'x': process_options |= PO_LINE_MATCH; break;
1451    
1452      case 'V':
1453      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1454      exit(0);
1455      break;
1456    
1457      default:
1458      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1459      exit(usage(2));
1460      }
1461    
1462    return options;
1463    }
1464    
1465    
1466    
1467    
1468    /*************************************************
1469    *          Construct printed ordinal             *
1470    *************************************************/
1471    
1472    /* This turns a number into "1st", "3rd", etc. */
1473    
1474    static char *
1475    ordin(int n)
1476    {
1477    static char buffer[8];
1478    char *p = buffer;
1479    sprintf(p, "%d", n);
1480    while (*p != 0) p++;
1481    switch (n%10)
1482      {
1483      case 1: strcpy(p, "st"); break;
1484      case 2: strcpy(p, "nd"); break;
1485      case 3: strcpy(p, "rd"); break;
1486      default: strcpy(p, "th"); break;
1487      }
1488    return buffer;
1489    }
1490    
1491    
1492    
1493    /*************************************************
1494    *          Compile a single pattern              *
1495    *************************************************/
1496    
1497    /* When the -F option has been used, this is called for each substring.
1498    Otherwise it's called for each supplied pattern.
1499    
1500    Arguments:
1501      pattern        the pattern string
1502      options        the PCRE options
1503      filename       the file name, or NULL for a command-line pattern
1504      count          0 if this is the only command line pattern, or
1505                     number of the command line pattern, or
1506                     linenumber for a pattern from a file
1507    
1508    Returns:         TRUE on success, FALSE after an error
1509    */
1510    
1511    static BOOL
1512    compile_single_pattern(char *pattern, int options, char *filename, int count)
1513    {
1514    char buffer[MBUFTHIRD + 16];
1515    const char *error;
1516    int errptr;
1517    
1518    if (pattern_count >= MAX_PATTERN_COUNT)
1519      {
1520      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1521        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1522      return FALSE;
1523      }
1524    
1525    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1526      suffix[process_options]);
1527    pattern_list[pattern_count] =
1528      pcre_compile(buffer, options, &error, &errptr, pcretables);
1529    if (pattern_list[pattern_count] != NULL)
1530      {
1531      pattern_count++;
1532      return TRUE;
1533      }
1534    
1535    /* Handle compile errors */
1536    
1537    errptr -= (int)strlen(prefix[process_options]);
1538    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1539    
1540    if (filename == NULL)
1541      {
1542      if (count == 0)
1543        fprintf(stderr, "pcregrep: Error in command-line regex "
1544          "at offset %d: %s\n", errptr, error);
1545      else
1546        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1547          "at offset %d: %s\n", ordin(count), errptr, error);
1548      }
1549    else
1550      {
1551      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1552        "at offset %d: %s\n", count, filename, errptr, error);
1553      }
1554    
1555    return FALSE;
1556    }
1557    
1558    
1559    
1560    /*************************************************
1561    *           Compile one supplied pattern         *
1562    *************************************************/
1563    
1564    /* When the -F option has been used, each string may be a list of strings,
1565    separated by line breaks. They will be matched literally.
1566    
1567    Arguments:
1568      pattern        the pattern string
1569      options        the PCRE options
1570      filename       the file name, or NULL for a command-line pattern
1571      count          0 if this is the only command line pattern, or
1572                     number of the command line pattern, or
1573                     linenumber for a pattern from a file
1574    
1575    Returns:         TRUE on success, FALSE after an error
1576    */
1577    
1578    static BOOL
1579    compile_pattern(char *pattern, int options, char *filename, int count)
1580    {
1581    if ((process_options & PO_FIXED_STRINGS) != 0)
1582      {
1583      char *eop = pattern + strlen(pattern);
1584      char buffer[MBUFTHIRD];
1585      for(;;)
1586        {
1587        int ellength;
1588        char *p = end_of_line(pattern, eop, &ellength);
1589        if (ellength == 0)
1590          return compile_single_pattern(pattern, options, filename, count);
1591        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1592        pattern = p;
1593        if (!compile_single_pattern(buffer, options, filename, count))
1594          return FALSE;
1595        }
1596      }
1597    else return compile_single_pattern(pattern, options, filename, count);
1598    }
1599    
1600    
1601    
1602    /*************************************************
1603    *                Main program                    *
1604    *************************************************/
1605    
1606    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1607    
1608    int
1609    main(int argc, char **argv)
1610    {
1611    int i, j;
1612    int rc = 1;
1613    int pcre_options = 0;
1614    int cmd_pattern_count = 0;
1615    int hint_count = 0;
1616    int errptr;
1617    BOOL only_one_at_top;
1618    char *patterns[MAX_PATTERN_COUNT];
1619    const char *locale_from = "--locale";
1620    const char *error;
1621    
1622    /* Set the default line ending value from the default in the PCRE library;
1623    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1624    */
1625    
1626    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1627    switch(i)
1628      {
1629      default:                 newline = (char *)"lf"; break;
1630      case '\r':               newline = (char *)"cr"; break;
1631      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1632      case -1:                 newline = (char *)"any"; break;
1633      case -2:                 newline = (char *)"anycrlf"; break;
1634      }
1635    
1636    /* Process the options */
1637    
1638    for (i = 1; i < argc; i++)
1639      {
1640      option_item *op = NULL;
1641      char *option_data = (char *)"";    /* default to keep compiler happy */
1642      BOOL longop;
1643      BOOL longopwasequals = FALSE;
1644    
1645      if (argv[i][0] != '-') break;
1646    
1647      /* If we hit an argument that is just "-", it may be a reference to STDIN,
1648      but only if we have previously had -e or -f to define the patterns. */
1649    
1650      if (argv[i][1] == 0)
1651        {
1652        if (pattern_filename != NULL || pattern_count > 0) break;
1653          else exit(usage(2));
1654        }
1655    
1656      /* Handle a long name option, or -- to terminate the options */
1657    
1658      if (argv[i][1] == '-')
1659        {
1660        char *arg = argv[i] + 2;
1661        char *argequals = strchr(arg, '=');
1662    
1663        if (*arg == 0)    /* -- terminates options */
1664          {
1665          i++;
1666          break;                /* out of the options-handling loop */
1667          }
1668    
1669        longop = TRUE;
1670    
1671        /* Some long options have data that follows after =, for example file=name.
1672        Some options have variations in the long name spelling: specifically, we
1673        allow "regexp" because GNU grep allows it, though I personally go along
1674        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1675        These options are entered in the table as "regex(p)". No option is in both
1676        these categories, fortunately. */
1677    
1678        for (op = optionlist; op->one_char != 0; op++)
1679          {
1680          char *opbra = strchr(op->long_name, '(');
1681          char *equals = strchr(op->long_name, '=');
1682          if (opbra == NULL)     /* Not a (p) case */
1683            {
1684            if (equals == NULL)  /* Not thing=data case */
1685              {
1686              if (strcmp(arg, op->long_name) == 0) break;
1687              }
1688            else                 /* Special case xxx=data */
1689              {
1690              int oplen = equals - op->long_name;
1691              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1692              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1693                {
1694                option_data = arg + arglen;
1695                if (*option_data == '=')
1696                  {
1697                  option_data++;
1698                  longopwasequals = TRUE;
1699                  }
1700                break;
1701                }
1702              }
1703            }
1704          else                   /* Special case xxxx(p) */
1705            {
1706            char buff1[24];
1707            char buff2[24];
1708            int baselen = opbra - op->long_name;
1709            sprintf(buff1, "%.*s", baselen, op->long_name);
1710            sprintf(buff2, "%s%.*s", buff1,
1711              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1712            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1713              break;
1714            }
1715          }
1716    
1717        if (op->one_char == 0)
1718          {
1719          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1720          exit(usage(2));
1721          }
1722        }
1723    
1724    
1725      /* Jeffrey Friedl's debugging harness uses these additional options which
1726      are not in the right form for putting in the option table because they use
1727      only one hyphen, yet are more than one character long. By putting them
1728      separately here, they will not get displayed as part of the help() output,
1729      but I don't think Jeffrey will care about that. */
1730    
1731    #ifdef JFRIEDL_DEBUG
1732      else if (strcmp(argv[i], "-pre") == 0) {
1733              jfriedl_prefix = argv[++i];
1734              continue;
1735      } else if (strcmp(argv[i], "-post") == 0) {
1736              jfriedl_postfix = argv[++i];
1737              continue;
1738      } else if (strcmp(argv[i], "-XT") == 0) {
1739              sscanf(argv[++i], "%d", &jfriedl_XT);
1740              continue;
1741      } else if (strcmp(argv[i], "-XR") == 0) {
1742              sscanf(argv[++i], "%d", &jfriedl_XR);
1743              continue;
1744      }
1745    #endif
1746    
1747    
1748      /* One-char options; many that have no data may be in a single argument; we
1749      continue till we hit the last one or one that needs data. */
1750    
1751      else
1752        {
1753        char *s = argv[i] + 1;
1754        longop = FALSE;
1755        while (*s != 0)
1756          {
1757          for (op = optionlist; op->one_char != 0; op++)
1758            { if (*s == op->one_char) break; }
1759          if (op->one_char == 0)
1760            {
1761            fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1762              *s, argv[i]);
1763            exit(usage(2));
1764            }
1765          if (op->type != OP_NODATA || s[1] == 0)
1766            {
1767            option_data = s+1;
1768            break;
1769            }
1770          pcre_options = handle_option(*s++, pcre_options);
1771          }
1772        }
1773    
1774      /* At this point we should have op pointing to a matched option. If the type
1775      is NO_DATA, it means that there is no data, and the option might set
1776      something in the PCRE options. */
1777    
1778      if (op->type == OP_NODATA)
1779        {
1780        pcre_options = handle_option(op->one_char, pcre_options);
1781        continue;
1782        }
1783    
1784      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1785      either has a value or defaults to something. It cannot have data in a
1786      separate item. At the moment, the only such options are "colo(u)r" and
1787      Jeffrey Friedl's special -S debugging option. */
1788    
1789      if (*option_data == 0 &&
1790          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1791        {
1792        switch (op->one_char)
1793          {
1794          case N_COLOUR:
1795          colour_option = (char *)"auto";
1796          break;
1797    #ifdef JFRIEDL_DEBUG
1798          case 'S':
1799          S_arg = 0;
1800          break;
1801    #endif
1802          }
1803        continue;
1804        }
1805    
1806      /* Otherwise, find the data string for the option. */
1807    
1808      if (*option_data == 0)
1809        {
1810        if (i >= argc - 1 || longopwasequals)
1811          {
1812          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1813          exit(usage(2));
1814          }
1815        option_data = argv[++i];
1816        }
1817    
1818      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1819      multiple times to create a list of patterns. */
1820    
1821      if (op->type == OP_PATLIST)
1822        {
1823        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1824          {
1825          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1826            MAX_PATTERN_COUNT);
1827          return 2;
1828          }
1829        patterns[cmd_pattern_count++] = option_data;
1830        }
1831    
1832      /* Otherwise, deal with single string or numeric data values. */
1833    
1834      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1835        {
1836        *((char **)op->dataptr) = option_data;
1837        }
1838      else
1839        {
1840        char *endptr;
1841        int n = strtoul(option_data, &endptr, 10);
1842        if (*endptr != 0)
1843          {
1844          if (longop)
1845            {
1846            char *equals = strchr(op->long_name, '=');
1847            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1848              equals - op->long_name;
1849            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1850              option_data, nlen, op->long_name);
1851            }
1852          else
1853            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1854              option_data, op->one_char);
1855          exit(usage(2));
1856          }
1857        *((int *)op->dataptr) = n;
1858        }
1859      }
1860    
1861    /* Options have been decoded. If -C was used, its value is used as a default
1862    for -A and -B. */
1863    
1864    if (both_context > 0)
1865      {
1866      if (after_context == 0) after_context = both_context;
1867      if (before_context == 0) before_context = both_context;
1868      }
1869    
1870    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
1871    However, the latter two set the only_matching flag. */
1872    
1873    if ((only_matching && (file_offsets || line_offsets)) ||
1874        (file_offsets && line_offsets))
1875      {
1876      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
1877        "and/or --line-offsets\n");
1878      exit(usage(2));
1879      }
1880    
1881    if (file_offsets || line_offsets) only_matching = TRUE;
1882    
1883    /* If a locale has not been provided as an option, see if the LC_CTYPE or
1884    LC_ALL environment variable is set, and if so, use it. */
1885    
1886    if (locale == NULL)
1887      {
1888      locale = getenv("LC_ALL");
1889      locale_from = "LCC_ALL";
1890      }
1891    
1892    if (locale == NULL)
1893      {
1894      locale = getenv("LC_CTYPE");
1895      locale_from = "LC_CTYPE";
1896      }
1897    
1898    /* If a locale has been provided, set it, and generate the tables the PCRE
1899    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1900    
1901    if (locale != NULL)
1902      {
1903      if (setlocale(LC_CTYPE, locale) == NULL)
1904        {
1905        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1906          locale, locale_from);
1907        return 2;
1908        }
1909      pcretables = pcre_maketables();
1910      }
1911    
1912    /* Sort out colouring */
1913    
1914    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1915      {
1916      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1917      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1918      else
1919        {
1920        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1921          colour_option);
1922        return 2;
1923        }
1924      if (do_colour)
1925        {
1926        char *cs = getenv("PCREGREP_COLOUR");
1927        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1928        if (cs != NULL) colour_string = cs;
1929        }
1930      }
1931    
1932    /* Interpret the newline type; the default settings are Unix-like. */
1933    
1934    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1935      {
1936      pcre_options |= PCRE_NEWLINE_CR;
1937      endlinetype = EL_CR;
1938      }
1939    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1940      {
1941      pcre_options |= PCRE_NEWLINE_LF;
1942      endlinetype = EL_LF;
1943      }
1944    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1945      {
1946      pcre_options |= PCRE_NEWLINE_CRLF;
1947      endlinetype = EL_CRLF;
1948      }
1949    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1950      {
1951      pcre_options |= PCRE_NEWLINE_ANY;
1952      endlinetype = EL_ANY;
1953      }
1954    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1955      {
1956      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1957      endlinetype = EL_ANYCRLF;
1958      }
1959    else
1960      {
1961      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1962      return 2;
1963      }
1964    
1965    /* Interpret the text values for -d and -D */
1966    
1967    if (dee_option != NULL)
1968      {
1969      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1970      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1971      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1972      else
1973        {
1974        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1975        return 2;
1976        }
1977      }
1978    
1979    if (DEE_option != NULL)
1980      {
1981      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1982      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1983      else
1984        {
1985        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1986        return 2;
1987        }
1988      }
1989    
1990    /* Check the values for Jeffrey Friedl's debugging options. */
1991    
1992    #ifdef JFRIEDL_DEBUG
1993    if (S_arg > 9)
1994      {
1995      fprintf(stderr, "pcregrep: bad value for -S option\n");
1996      return 2;
1997      }
1998    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1999      {
2000      if (jfriedl_XT == 0) jfriedl_XT = 1;
2001      if (jfriedl_XR == 0) jfriedl_XR = 1;
2002      }
2003    #endif
2004    
2005    /* Get memory to store the pattern and hints lists. */
2006    
2007    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2008    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2009    
2010    if (pattern_list == NULL || hints_list == NULL)
2011      {
2012      fprintf(stderr, "pcregrep: malloc failed\n");
2013      goto EXIT2;
2014      }
2015    
2016    /* If no patterns were provided by -e, and there is no file provided by -f,
2017    the first argument is the one and only pattern, and it must exist. */
2018    
2019    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2020      {
2021      if (i >= argc) return usage(2);
2022      patterns[cmd_pattern_count++] = argv[i++];
2023      }
2024    
2025    /* Compile the patterns that were provided on the command line, either by
2026    multiple uses of -e or as a single unkeyed pattern. */
2027    
2028    for (j = 0; j < cmd_pattern_count; j++)
2029      {
2030      if (!compile_pattern(patterns[j], pcre_options, NULL,
2031           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2032        goto EXIT2;
2033      }
2034    
2035    /* Compile the regular expressions that are provided in a file. */
2036    
2037    if (pattern_filename != NULL)
2038      {
2039      int linenumber = 0;
2040      FILE *f;
2041      char *filename;
2042      char buffer[MBUFTHIRD];
2043    
2044      if (strcmp(pattern_filename, "-") == 0)
2045        {
2046        f = stdin;
2047        filename = stdin_name;
2048        }
2049      else
2050        {
2051        f = fopen(pattern_filename, "r");
2052        if (f == NULL)
2053          {
2054          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2055            strerror(errno));
2056          goto EXIT2;
2057          }
2058        filename = pattern_filename;
2059        }
2060    
2061      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2062        {
2063        char *s = buffer + (int)strlen(buffer);
2064        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2065        *s = 0;
2066        linenumber++;
2067        if (buffer[0] == 0) continue;   /* Skip blank lines */
2068        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2069          goto EXIT2;
2070        }
2071    
2072      if (f != stdin) fclose(f);
2073      }
2074    
2075    /* Study the regular expressions, as we will be running them many times */
2076    
2077    for (j = 0; j < pattern_count; j++)
2078      {
2079      hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2080      if (error != NULL)
2081        {
2082        char s[16];
2083        if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2084        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2085        goto EXIT2;
2086        }
2087      hint_count++;
2088      }
2089    
2090    /* If there are include or exclude patterns, compile them. */
2091    
2092    if (exclude_pattern != NULL)
2093      {
2094      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2095        pcretables);
2096      if (exclude_compiled == NULL)
2097        {
2098        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2099          errptr, error);
2100        goto EXIT2;
2101        }
2102      }
2103    
2104    if (include_pattern != NULL)
2105      {
2106      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2107        pcretables);
2108      if (include_compiled == NULL)
2109        {
2110        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2111          errptr, error);
2112        goto EXIT2;
2113        }
2114      }
2115    
2116    /* If there are no further arguments, do the business on stdin and exit. */
2117    
2118    if (i >= argc)
2119      {
2120      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2121      goto EXIT;
2122      }
2123    
2124    /* Otherwise, work through the remaining arguments as files or directories.
2125    Pass in the fact that there is only one argument at top level - this suppresses
2126    the file name if the argument is not a directory and filenames are not
2127    otherwise forced. */
2128    
2129    only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2130    
2131    for (; i < argc; i++)
2132      {
2133      int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2134        only_one_at_top);
2135      if (frc > 1) rc = frc;
2136        else if (frc == 0 && rc == 1) rc = 0;
2137      }
2138    
2139    EXIT:
2140    if (pattern_list != NULL)
2141      {
2142      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2143      free(pattern_list);
2144      }
2145    if (hints_list != NULL)
2146      {
2147      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2148      free(hints_list);
2149      }
2150    return rc;
2151    
2152    EXIT2:
2153    rc = 2;
2154    goto EXIT;
2155  }  }
2156    
2157  /* End */  /* End of pcregrep */

Legend:
Removed from v.49  
changed lines
  Added in v.283

  ViewVC Help
Powered by ViewVC 1.1.5