/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 515 by ph10, Tue May 4 09:12:25 2010 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2010 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44    #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 17  its pattern matching. */ Line 70  its pattern matching. */
70    
71  typedef int BOOL;  typedef int BOOL;
72    
73    #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define MBUFTHIRD BUFSIZ
78    #else
79    #define MBUFTHIRD 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
118  *               Global variables                 *  *               Global variables                 *
119  *************************************************/  *************************************************/
120    
121  static pcre *pattern;  /* Jeffrey Friedl has some debugging requirements that are not part of the
122  static pcre_extra *hints;  regular code. */
123    
124    #ifdef JFRIEDL_DEBUG
125    static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130    #endif
131    
132    static int  endlinetype;
133    
134    static char *colour_string = (char *)"1;31";
135    static char *colour_option = NULL;
136    static char *dee_option = NULL;
137    static char *DEE_option = NULL;
138    static char *newline = NULL;
139    static char *pattern_filename = NULL;
140    static char *stdin_name = (char *)"(standard input)";
141    static char *locale = NULL;
142    
143    static const unsigned char *pcretables = NULL;
144    
145    static int  pattern_count = 0;
146    static pcre **pattern_list = NULL;
147    static pcre_extra **hints_list = NULL;
148    
149    static char *include_pattern = NULL;
150    static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154    static pcre *include_compiled = NULL;
155    static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159    static int after_context = 0;
160    static int before_context = 0;
161    static int both_context = 0;
162    static int dee_action = dee_READ;
163    static int DEE_action = DEE_READ;
164    static int error_count = 0;
165    static int filenames = FN_DEFAULT;
166    static int process_options = 0;
167    
168  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
169  static BOOL filenames_only = FALSE;  static BOOL do_colour = FALSE;
170    static BOOL file_offsets = FALSE;
171    static BOOL hyphenpending = FALSE;
172  static BOOL invert = FALSE;  static BOOL invert = FALSE;
173    static BOOL line_offsets = FALSE;
174    static BOOL multiline = FALSE;
175  static BOOL number = FALSE;  static BOOL number = FALSE;
176    static BOOL omit_zero_count = FALSE;
177    static BOOL only_matching = FALSE;
178    static BOOL quiet = FALSE;
179  static BOOL silent = FALSE;  static BOOL silent = FALSE;
180  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
181    
182    /* Structure for options and list of them */
183    
184    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
185           OP_PATLIST };
186    
187    typedef struct option_item {
188      int type;
189      int one_char;
190      void *dataptr;
191      const char *long_name;
192      const char *help_text;
193    } option_item;
194    
195    /* Options without a single-letter equivalent get a negative value. This can be
196    used to identify them. */
197    
198    #define N_COLOUR       (-1)
199    #define N_EXCLUDE      (-2)
200    #define N_EXCLUDE_DIR  (-3)
201    #define N_HELP         (-4)
202    #define N_INCLUDE      (-5)
203    #define N_INCLUDE_DIR  (-6)
204    #define N_LABEL        (-7)
205    #define N_LOCALE       (-8)
206    #define N_NULL         (-9)
207    #define N_LOFFSETS     (-10)
208    #define N_FOFFSETS     (-11)
209    
210    static option_item optionlist[] = {
211      { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
212      { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
213      { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
214      { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
215      { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
216      { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
217      { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
218      { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
219      { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
220      { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
221      { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
222      { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
223      { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
224      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
225      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
226      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
227      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
228      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
229      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
230      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
231      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
232      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
233      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
234      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
235      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
236      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
237      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
238      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
239      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
240      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
241      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
242      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
243    #ifdef JFRIEDL_DEBUG
244      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
245    #endif
246      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
247      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
248      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
249      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
250      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
251      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
252      { OP_NODATA,    0,        NULL,               NULL,            NULL }
253    };
254    
255    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
256    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
257    that the combination of -w and -x has the same effect as -x on its own, so we
258    can treat them as the same. */
259    
260    static const char *prefix[] = {
261      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
262    
263    static const char *suffix[] = {
264      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
265    
266    /* UTF-8 tables - used only when the newline setting is "any". */
267    
268    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
269    
270    const char utf8_table4[] = {
271      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
272      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
273      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
274      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
275    
276    
277    
278    /*************************************************
279    *            OS-specific functions               *
280    *************************************************/
281    
282    /* These functions are defined so that they can be made system specific,
283    although at present the only ones are for Unix, Win32, and for "no support". */
284    
285    
286    /************* Directory scanning in Unix ***********/
287    
288    #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
289    #include <sys/types.h>
290    #include <sys/stat.h>
291    #include <dirent.h>
292    
293    typedef DIR directory_type;
294    
295    static int
296    isdirectory(char *filename)
297    {
298    struct stat statbuf;
299    if (stat(filename, &statbuf) < 0)
300      return 0;        /* In the expectation that opening as a file will fail */
301    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
302    }
303    
304    static directory_type *
305    opendirectory(char *filename)
306    {
307    return opendir(filename);
308    }
309    
310    static char *
311    readdirectory(directory_type *dir)
312    {
313    for (;;)
314      {
315      struct dirent *dent = readdir(dir);
316      if (dent == NULL) return NULL;
317      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
318        return dent->d_name;
319      }
320    /* Control never reaches here */
321    }
322    
323    static void
324    closedirectory(directory_type *dir)
325    {
326    closedir(dir);
327    }
328    
329    
330    /************* Test for regular file in Unix **********/
331    
332    static int
333    isregfile(char *filename)
334    {
335    struct stat statbuf;
336    if (stat(filename, &statbuf) < 0)
337      return 1;        /* In the expectation that opening as a file will fail */
338    return (statbuf.st_mode & S_IFMT) == S_IFREG;
339    }
340    
341    
342    /************* Test stdout for being a terminal in Unix **********/
343    
344    static BOOL
345    is_stdout_tty(void)
346    {
347    return isatty(fileno(stdout));
348    }
349    
350    
351    /************* Directory scanning in Win32 ***********/
352    
353    /* I (Philip Hazel) have no means of testing this code. It was contributed by
354    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
355    when it did not exist. David Byron added a patch that moved the #include of
356    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
357    */
358    
359    #elif HAVE_WINDOWS_H
360    
361    #ifndef STRICT
362    # define STRICT
363    #endif
364    #ifndef WIN32_LEAN_AND_MEAN
365    # define WIN32_LEAN_AND_MEAN
366    #endif
367    
368    #include <windows.h>
369    
370    #ifndef INVALID_FILE_ATTRIBUTES
371    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
372    #endif
373    
374    typedef struct directory_type
375    {
376    HANDLE handle;
377    BOOL first;
378    WIN32_FIND_DATA data;
379    } directory_type;
380    
381    int
382    isdirectory(char *filename)
383    {
384    DWORD attr = GetFileAttributes(filename);
385    if (attr == INVALID_FILE_ATTRIBUTES)
386      return 0;
387    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
388    }
389    
390    directory_type *
391    opendirectory(char *filename)
392    {
393    size_t len;
394    char *pattern;
395    directory_type *dir;
396    DWORD err;
397    len = strlen(filename);
398    pattern = (char *) malloc(len + 3);
399    dir = (directory_type *) malloc(sizeof(*dir));
400    if ((pattern == NULL) || (dir == NULL))
401      {
402      fprintf(stderr, "pcregrep: malloc failed\n");
403      exit(2);
404      }
405    memcpy(pattern, filename, len);
406    memcpy(&(pattern[len]), "\\*", 3);
407    dir->handle = FindFirstFile(pattern, &(dir->data));
408    if (dir->handle != INVALID_HANDLE_VALUE)
409      {
410      free(pattern);
411      dir->first = TRUE;
412      return dir;
413      }
414    err = GetLastError();
415    free(pattern);
416    free(dir);
417    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
418    return NULL;
419    }
420    
421    char *
422    readdirectory(directory_type *dir)
423    {
424    for (;;)
425      {
426      if (!dir->first)
427        {
428        if (!FindNextFile(dir->handle, &(dir->data)))
429          return NULL;
430        }
431      else
432        {
433        dir->first = FALSE;
434        }
435      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
436        return dir->data.cFileName;
437      }
438    #ifndef _MSC_VER
439    return NULL;   /* Keep compiler happy; never executed */
440    #endif
441    }
442    
443    void
444    closedirectory(directory_type *dir)
445    {
446    FindClose(dir->handle);
447    free(dir);
448    }
449    
450    
451    /************* Test for regular file in Win32 **********/
452    
453    /* I don't know how to do this, or if it can be done; assume all paths are
454    regular if they are not directories. */
455    
456    int isregfile(char *filename)
457    {
458    return !isdirectory(filename);
459    }
460    
461  #if ! HAVE_STRERROR  
462    /************* Test stdout for being a terminal in Win32 **********/
463    
464    /* I don't know how to do this; assume never */
465    
466    static BOOL
467    is_stdout_tty(void)
468    {
469    return FALSE;
470    }
471    
472    
473    /************* Directory scanning when we can't do it ***********/
474    
475    /* The type is void, and apart from isdirectory(), the functions do nothing. */
476    
477    #else
478    
479    typedef void directory_type;
480    
481    int isdirectory(char *filename) { return 0; }
482    directory_type * opendirectory(char *filename) { return (directory_type*)0;}
483    char *readdirectory(directory_type *dir) { return (char*)0;}
484    void closedirectory(directory_type *dir) {}
485    
486    
487    /************* Test for regular when we can't do it **********/
488    
489    /* Assume all files are regular. */
490    
491    int isregfile(char *filename) { return 1; }
492    
493    
494    /************* Test stdout for being a terminal when we can't do it **********/
495    
496    static BOOL
497    is_stdout_tty(void)
498    {
499    return FALSE;
500    }
501    
502    
503    #endif
504    
505    
506    
507    #ifndef HAVE_STRERROR
508  /*************************************************  /*************************************************
509  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
510  *************************************************/  *************************************************/
# Line 58  return sys_errlist[n]; Line 527  return sys_errlist[n];
527    
528    
529  /*************************************************  /*************************************************
530  *              Grep an individual file           *  *             Find end of line                   *
531  *************************************************/  *************************************************/
532    
533  static int  /* The length of the endline sequence that is found is set via lenptr. This may
534  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
 {  
 int rc = 1;  
 int linenumber = 0;  
 int count = 0;  
 int offsets[99];  
 char buffer[BUFSIZ];  
535    
536  while (fgets(buffer, sizeof(buffer), in) != NULL)  Arguments:
537    {    p         current position in line
538    BOOL match;    endptr    end of available data
539    int length = (int)strlen(buffer);    lenptr    where to put the length of the eol sequence
   if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;  
   linenumber++;  
540    
541    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;  Returns:    pointer to the last byte of the line
542    if (match && whole_lines && offsets[1] != length) match = FALSE;  */
543    
544    if (match != invert)  static char *
545    end_of_line(char *p, char *endptr, int *lenptr)
546    {
547    switch(endlinetype)
548      {
549      default:      /* Just in case */
550      case EL_LF:
551      while (p < endptr && *p != '\n') p++;
552      if (p < endptr)
553      {      {
554      if (count_only) count++;      *lenptr = 1;
555        return p + 1;
556        }
557      *lenptr = 0;
558      return endptr;
559    
560      case EL_CR:
561      while (p < endptr && *p != '\r') p++;
562      if (p < endptr)
563        {
564        *lenptr = 1;
565        return p + 1;
566        }
567      *lenptr = 0;
568      return endptr;
569    
570      else if (filenames_only)    case EL_CRLF:
571      for (;;)
572        {
573        while (p < endptr && *p != '\r') p++;
574        if (++p >= endptr)
575        {        {
576        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        *lenptr = 0;
577        return 0;        return endptr;
578          }
579        if (*p == '\n')
580          {
581          *lenptr = 2;
582          return p + 1;
583        }        }
584        }
585      break;
586    
587      else if (silent) return 0;    case EL_ANYCRLF:
588      while (p < endptr)
589        {
590        int extra = 0;
591        register int c = *((unsigned char *)p);
592    
593      else      if (utf8 && c >= 0xc0)
594        {        {
595        if (name != NULL) fprintf(stdout, "%s:", name);        int gcii, gcss;
596        if (number) fprintf(stdout, "%d:", linenumber);        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
597        fprintf(stdout, "%s\n", buffer);        gcss = 6*extra;
598          c = (c & utf8_table3[extra]) << gcss;
599          for (gcii = 1; gcii <= extra; gcii++)
600            {
601            gcss -= 6;
602            c |= (p[gcii] & 0x3f) << gcss;
603            }
604        }        }
605    
606      rc = 0;      p += 1 + extra;
     }  
   }  
607    
608  if (count_only)      switch (c)
609    {        {
610    if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
611    fprintf(stdout, "%d\n", count);        *lenptr = 1;
612    }        return p;
613    
614          case 0x0d:    /* CR */
615          if (p < endptr && *p == 0x0a)
616            {
617            *lenptr = 2;
618            p++;
619            }
620          else *lenptr = 1;
621          return p;
622    
623  return rc;        default:
624  }        break;
625          }
626        }   /* End of loop for ANYCRLF case */
627    
628      *lenptr = 0;  /* Must have hit the end */
629      return endptr;
630    
631      case EL_ANY:
632      while (p < endptr)
633        {
634        int extra = 0;
635        register int c = *((unsigned char *)p);
636    
637        if (utf8 && c >= 0xc0)
638          {
639          int gcii, gcss;
640          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
641          gcss = 6*extra;
642          c = (c & utf8_table3[extra]) << gcss;
643          for (gcii = 1; gcii <= extra; gcii++)
644            {
645            gcss -= 6;
646            c |= (p[gcii] & 0x3f) << gcss;
647            }
648          }
649    
650  /*************************************************      p += 1 + extra;
 *                Usage function                  *  
 *************************************************/  
651    
652  static int      switch (c)
653  usage(int rc)        {
654  {        case 0x0a:    /* LF */
655  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");        case 0x0b:    /* VT */
656  return rc;        case 0x0c:    /* FF */
657  }        *lenptr = 1;
658          return p;
659    
660          case 0x0d:    /* CR */
661          if (p < endptr && *p == 0x0a)
662            {
663            *lenptr = 2;
664            p++;
665            }
666          else *lenptr = 1;
667          return p;
668    
669          case 0x85:    /* NEL */
670          *lenptr = utf8? 2 : 1;
671          return p;
672    
673          case 0x2028:  /* LS */
674          case 0x2029:  /* PS */
675          *lenptr = 3;
676          return p;
677    
678          default:
679          break;
680          }
681        }   /* End of loop for ANY case */
682    
683      *lenptr = 0;  /* Must have hit the end */
684      return endptr;
685      }     /* End of overall switch */
686    }
687    
688    
689    
690  /*************************************************  /*************************************************
691  *                Main program                    *  *         Find start of previous line            *
692  *************************************************/  *************************************************/
693    
694  int  /* This is called when looking back for before lines to print.
 main(int argc, char **argv)  
 {  
 int i;  
 int rc = 1;  
 int options = 0;  
 int errptr;  
 const char *error;  
 BOOL filenames = TRUE;  
695    
696  /* Process the options */  Arguments:
697      p         start of the subsequent line
698      startptr  start of available data
699    
700  for (i = 1; i < argc; i++)  Returns:    pointer to the start of the previous line
701    */
702    
703    static char *
704    previous_line(char *p, char *startptr)
705    {
706    switch(endlinetype)
707    {    {
708    char *s;    default:      /* Just in case */
709    if (argv[i][0] != '-') break;    case EL_LF:
710    s = argv[i] + 1;    p--;
711    while (*s != 0)    while (p > startptr && p[-1] != '\n') p--;
712      return p;
713    
714      case EL_CR:
715      p--;
716      while (p > startptr && p[-1] != '\n') p--;
717      return p;
718    
719      case EL_CRLF:
720      for (;;)
721        {
722        p -= 2;
723        while (p > startptr && p[-1] != '\n') p--;
724        if (p <= startptr + 1 || p[-2] == '\r') return p;
725        }
726      return p;   /* But control should never get here */
727    
728      case EL_ANY:
729      case EL_ANYCRLF:
730      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
731      if (utf8) while ((*p & 0xc0) == 0x80) p--;
732    
733      while (p > startptr)
734      {      {
735      switch (*s++)      register int c;
736        char *pp = p - 1;
737    
738        if (utf8)
739          {
740          int extra = 0;
741          while ((*pp & 0xc0) == 0x80) pp--;
742          c = *((unsigned char *)pp);
743          if (c >= 0xc0)
744            {
745            int gcii, gcss;
746            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
747            gcss = 6*extra;
748            c = (c & utf8_table3[extra]) << gcss;
749            for (gcii = 1; gcii <= extra; gcii++)
750              {
751              gcss -= 6;
752              c |= (pp[gcii] & 0x3f) << gcss;
753              }
754            }
755          }
756        else c = *((unsigned char *)pp);
757    
758        if (endlinetype == EL_ANYCRLF) switch (c)
759        {        {
760        case 'c': count_only = TRUE; break;        case 0x0a:    /* LF */
761        case 'h': filenames = FALSE; break;        case 0x0d:    /* CR */
762        case 'i': options |= PCRE_CASELESS; break;        return p;
       case 'l': filenames_only = TRUE;  
       case 'n': number = TRUE; break;  
       case 's': silent = TRUE; break;  
       case 'v': invert = TRUE; break;  
       case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
763    
764        case 'V':        default:
       fprintf(stderr, "PCRE version %s\n", pcre_version());  
765        break;        break;
766          }
767    
768        else switch (c)
769          {
770          case 0x0a:    /* LF */
771          case 0x0b:    /* VT */
772          case 0x0c:    /* FF */
773          case 0x0d:    /* CR */
774          case 0x85:    /* NEL */
775          case 0x2028:  /* LS */
776          case 0x2029:  /* PS */
777          return p;
778    
779        default:        default:
780        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);        break;
       return usage(2);  
781        }        }
     }  
   }  
782    
783  /* There must be at least a regexp argument */      p = pp;  /* Back one character */
784        }        /* End of loop for ANY case */
785    
786  if (i >= argc) return usage(0);    return startptr;  /* Hit start of data */
787      }     /* End of overall switch */
788    }
789    
 /* Compile the regular expression. */  
790    
 pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  
 if (pattern == NULL)  
   {  
   fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);  
   return 2;  
   }  
791    
 /* Study the regular expression, as we will be running it may times */  
792    
793  hints = pcre_study(pattern, 0, &error);  
794  if (error != NULL)  /*************************************************
795    *       Print the previous "after" lines         *
796    *************************************************/
797    
798    /* This is called if we are about to lose said lines because of buffer filling,
799    and at the end of the file. The data in the line is written using fwrite() so
800    that a binary zero does not terminate it.
801    
802    Arguments:
803      lastmatchnumber   the number of the last matching line, plus one
804      lastmatchrestart  where we restarted after the last match
805      endptr            end of available data
806      printname         filename for printing
807    
808    Returns:            nothing
809    */
810    
811    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
812      char *endptr, char *printname)
813    {
814    if (after_context > 0 && lastmatchnumber > 0)
815    {    {
816    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);    int count = 0;
817    return 2;    while (lastmatchrestart < endptr && count++ < after_context)
818        {
819        int ellength;
820        char *pp = lastmatchrestart;
821        if (printname != NULL) fprintf(stdout, "%s-", printname);
822        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
823        pp = end_of_line(pp, endptr, &ellength);
824        FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
825        lastmatchrestart = pp;
826        }
827      hyphenpending = TRUE;
828    }    }
829    }
830    
 /* If there are no further arguments, do the business on stdin and exit */  
831    
 if (i >= argc) return pcregrep(stdin, NULL);  
832    
833  /* Otherwise, work through the remaining arguments as files. If there is only  /*************************************************
834  one, don't give its name on the output. */  *   Apply patterns to subject till one matches   *
835    *************************************************/
836    
837  if (i == argc - 1) filenames = FALSE;  /* This function is called to run through all patterns, looking for a match. It
838  if (filenames_only) filenames = TRUE;  is used multiple times for the same subject when colouring is enabled, in order
839    to find all possible matches.
840    
841    Arguments:
842      matchptr    the start of the subject
843      length      the length of the subject to match
844      offsets     the offets vector to fill in
845      mrc         address of where to put the result of pcre_exec()
846    
847    Returns:      TRUE if there was a match
848                  FALSE if there was no match
849                  invert if there was a non-fatal error
850    */
851    
852  for (; i < argc; i++)  static BOOL
853    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
854    {
855    int i;
856    for (i = 0; i < pattern_count; i++)
857    {    {
858    FILE *in = fopen(argv[i], "r");    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
859    if (in == NULL)      PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
860      if (*mrc >= 0) return TRUE;
861      if (*mrc == PCRE_ERROR_NOMATCH) continue;
862      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
863      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
864      fprintf(stderr, "this text:\n");
865      FWRITE(matchptr, 1, length, stderr);   /* In case binary zero included */
866      fprintf(stderr, "\n");
867      if (error_count == 0 &&
868          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
869      {      {
870      fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));      fprintf(stderr, "pcregrep: error %d means that a resource limit "
871      rc = 2;        "was exceeded\n", *mrc);
872        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
873      }      }
874    else    if (error_count++ > 20)
875      {      {
876      int frc = pcregrep(in, filenames? argv[i] : NULL);      fprintf(stderr, "pcregrep: too many errors - abandoned\n");
877      if (frc == 0 && rc == 1) rc = 0;      exit(2);
     fclose(in);  
878      }      }
879      return invert;    /* No more matching; don't show the line again */
880    }    }
881    
882  return rc;  return FALSE;  /* No match, no errors */
883    }
884    
885    
886    
887    /*************************************************
888    *            Grep an individual file             *
889    *************************************************/
890    
891    /* This is called from grep_or_recurse() below. It uses a buffer that is three
892    times the value of MBUFTHIRD. The matching point is never allowed to stray into
893    the top third of the buffer, thus keeping more of the file available for
894    context printing or for multiline scanning. For large files, the pointer will
895    be in the middle third most of the time, so the bottom third is available for
896    "before" context printing.
897    
898    Arguments:
899      handle       the fopened FILE stream for a normal file
900                   the gzFile pointer when reading is via libz
901                   the BZFILE pointer when reading is via libbz2
902      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
903      printname    the file name if it is to be printed for each match
904                   or NULL if the file name is not to be printed
905                   it cannot be NULL if filenames[_nomatch]_only is set
906    
907    Returns:       0 if there was at least one match
908                   1 otherwise (no matches)
909                   2 if there is a read error on a .bz2 file
910    */
911    
912    static int
913    pcregrep(void *handle, int frtype, char *printname)
914    {
915    int rc = 1;
916    int linenumber = 1;
917    int lastmatchnumber = 0;
918    int count = 0;
919    int filepos = 0;
920    int offsets[OFFSET_SIZE];
921    char *lastmatchrestart = NULL;
922    char buffer[3*MBUFTHIRD];
923    char *ptr = buffer;
924    char *endptr;
925    size_t bufflength;
926    BOOL endhyphenpending = FALSE;
927    FILE *in = NULL;                    /* Ensure initialized */
928    
929    #ifdef SUPPORT_LIBZ
930    gzFile ingz = NULL;
931    #endif
932    
933    #ifdef SUPPORT_LIBBZ2
934    BZFILE *inbz2 = NULL;
935    #endif
936    
937    
938    /* Do the first read into the start of the buffer and set up the pointer to end
939    of what we have. In the case of libz, a non-zipped .gz file will be read as a
940    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
941    fail. */
942    
943    #ifdef SUPPORT_LIBZ
944    if (frtype == FR_LIBZ)
945      {
946      ingz = (gzFile)handle;
947      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
948      }
949    else
950    #endif
951    
952    #ifdef SUPPORT_LIBBZ2
953    if (frtype == FR_LIBBZ2)
954      {
955      inbz2 = (BZFILE *)handle;
956      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
957      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
958      }                                    /* without the cast it is unsigned. */
959    else
960    #endif
961    
962      {
963      in = (FILE *)handle;
964      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
965      }
966    
967    endptr = buffer + bufflength;
968    
969    /* Loop while the current pointer is not at the end of the file. For large
970    files, endptr will be at the end of the buffer when we are in the middle of the
971    file, but ptr will never get there, because as soon as it gets over 2/3 of the
972    way, the buffer is shifted left and re-filled. */
973    
974    while (ptr < endptr)
975      {
976      int endlinelength;
977      int mrc = 0;
978      BOOL match;
979      char *matchptr = ptr;
980      char *t = ptr;
981      size_t length, linelength;
982    
983      /* At this point, ptr is at the start of a line. We need to find the length
984      of the subject string to pass to pcre_exec(). In multiline mode, it is the
985      length remainder of the data in the buffer. Otherwise, it is the length of
986      the next line, excluding the terminating newline. After matching, we always
987      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
988      option is used for compiling, so that any match is constrained to be in the
989      first line. */
990    
991      t = end_of_line(t, endptr, &endlinelength);
992      linelength = t - ptr - endlinelength;
993      length = multiline? (size_t)(endptr - ptr) : linelength;
994    
995      /* Extra processing for Jeffrey Friedl's debugging. */
996    
997    #ifdef JFRIEDL_DEBUG
998      if (jfriedl_XT || jfriedl_XR)
999      {
1000          #include <sys/time.h>
1001          #include <time.h>
1002          struct timeval start_time, end_time;
1003          struct timezone dummy;
1004          int i;
1005    
1006          if (jfriedl_XT)
1007          {
1008              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1009              const char *orig = ptr;
1010              ptr = malloc(newlen + 1);
1011              if (!ptr) {
1012                      printf("out of memory");
1013                      exit(2);
1014              }
1015              endptr = ptr;
1016              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1017              for (i = 0; i < jfriedl_XT; i++) {
1018                      strncpy(endptr, orig,  length);
1019                      endptr += length;
1020              }
1021              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1022              length = newlen;
1023          }
1024    
1025          if (gettimeofday(&start_time, &dummy) != 0)
1026                  perror("bad gettimeofday");
1027    
1028    
1029          for (i = 0; i < jfriedl_XR; i++)
1030              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1031                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1032    
1033          if (gettimeofday(&end_time, &dummy) != 0)
1034                  perror("bad gettimeofday");
1035    
1036          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1037                          -
1038                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1039    
1040          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1041          return 0;
1042      }
1043    #endif
1044    
1045      /* We come back here after a match when the -o option (only_matching) is set,
1046      in order to find any further matches in the same line. */
1047    
1048      ONLY_MATCHING_RESTART:
1049    
1050      /* Run through all the patterns until one matches or there is an error other
1051      than NOMATCH. This code is in a subroutine so that it can be re-used for
1052      finding subsequent matches when colouring matched lines. */
1053    
1054      match = match_patterns(matchptr, length, offsets, &mrc);
1055    
1056      /* If it's a match or a not-match (as required), do what's wanted. */
1057    
1058      if (match != invert)
1059        {
1060        BOOL hyphenprinted = FALSE;
1061    
1062        /* We've failed if we want a file that doesn't have any matches. */
1063    
1064        if (filenames == FN_NOMATCH_ONLY) return 1;
1065    
1066        /* Just count if just counting is wanted. */
1067    
1068        if (count_only) count++;
1069    
1070        /* If all we want is a file name, there is no need to scan any more lines
1071        in the file. */
1072    
1073        else if (filenames == FN_MATCH_ONLY)
1074          {
1075          fprintf(stdout, "%s\n", printname);
1076          return 0;
1077          }
1078    
1079        /* Likewise, if all we want is a yes/no answer. */
1080    
1081        else if (quiet) return 0;
1082    
1083        /* The --only-matching option prints just the substring that matched, and
1084        the --file-offsets and --line-offsets options output offsets for the
1085        matching substring (they both force --only-matching). None of these options
1086        prints any context. Afterwards, adjust the start and length, and then jump
1087        back to look for further matches in the same line. If we are in invert
1088        mode, however, nothing is printed - this could be still useful because the
1089        return code is set. */
1090    
1091        else if (only_matching)
1092          {
1093          if (!invert)
1094            {
1095            if (printname != NULL) fprintf(stdout, "%s:", printname);
1096            if (number) fprintf(stdout, "%d:", linenumber);
1097            if (line_offsets)
1098              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1099                offsets[1] - offsets[0]);
1100            else if (file_offsets)
1101              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1102                offsets[1] - offsets[0]);
1103            else
1104              {
1105              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1106              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1107              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1108              }
1109            fprintf(stdout, "\n");
1110            matchptr += offsets[1];
1111            length -= offsets[1];
1112            match = FALSE;
1113            goto ONLY_MATCHING_RESTART;
1114            }
1115          }
1116    
1117        /* This is the default case when none of the above options is set. We print
1118        the matching lines(s), possibly preceded and/or followed by other lines of
1119        context. */
1120    
1121        else
1122          {
1123          /* See if there is a requirement to print some "after" lines from a
1124          previous match. We never print any overlaps. */
1125    
1126          if (after_context > 0 && lastmatchnumber > 0)
1127            {
1128            int ellength;
1129            int linecount = 0;
1130            char *p = lastmatchrestart;
1131    
1132            while (p < ptr && linecount < after_context)
1133              {
1134              p = end_of_line(p, ptr, &ellength);
1135              linecount++;
1136              }
1137    
1138            /* It is important to advance lastmatchrestart during this printing so
1139            that it interacts correctly with any "before" printing below. Print
1140            each line's data using fwrite() in case there are binary zeroes. */
1141    
1142            while (lastmatchrestart < p)
1143              {
1144              char *pp = lastmatchrestart;
1145              if (printname != NULL) fprintf(stdout, "%s-", printname);
1146              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1147              pp = end_of_line(pp, endptr, &ellength);
1148              FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1149              lastmatchrestart = pp;
1150              }
1151            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1152            }
1153    
1154          /* If there were non-contiguous lines printed above, insert hyphens. */
1155    
1156          if (hyphenpending)
1157            {
1158            fprintf(stdout, "--\n");
1159            hyphenpending = FALSE;
1160            hyphenprinted = TRUE;
1161            }
1162    
1163          /* See if there is a requirement to print some "before" lines for this
1164          match. Again, don't print overlaps. */
1165    
1166          if (before_context > 0)
1167            {
1168            int linecount = 0;
1169            char *p = ptr;
1170    
1171            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1172                   linecount < before_context)
1173              {
1174              linecount++;
1175              p = previous_line(p, buffer);
1176              }
1177    
1178            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1179              fprintf(stdout, "--\n");
1180    
1181            while (p < ptr)
1182              {
1183              int ellength;
1184              char *pp = p;
1185              if (printname != NULL) fprintf(stdout, "%s-", printname);
1186              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1187              pp = end_of_line(pp, endptr, &ellength);
1188              FWRITE(p, 1, pp - p, stdout);
1189              p = pp;
1190              }
1191            }
1192    
1193          /* Now print the matching line(s); ensure we set hyphenpending at the end
1194          of the file if any context lines are being output. */
1195    
1196          if (after_context > 0 || before_context > 0)
1197            endhyphenpending = TRUE;
1198    
1199          if (printname != NULL) fprintf(stdout, "%s:", printname);
1200          if (number) fprintf(stdout, "%d:", linenumber);
1201    
1202          /* In multiline mode, we want to print to the end of the line in which
1203          the end of the matched string is found, so we adjust linelength and the
1204          line number appropriately, but only when there actually was a match
1205          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1206          the match will always be before the first newline sequence. */
1207    
1208          if (multiline)
1209            {
1210            int ellength;
1211            char *endmatch = ptr;
1212            if (!invert)
1213              {
1214              endmatch += offsets[1];
1215              t = ptr;
1216              while (t < endmatch)
1217                {
1218                t = end_of_line(t, endptr, &ellength);
1219                if (t <= endmatch) linenumber++; else break;
1220                }
1221              }
1222            endmatch = end_of_line(endmatch, endptr, &ellength);
1223            linelength = endmatch - ptr - ellength;
1224            }
1225    
1226          /*** NOTE: Use only fwrite() to output the data line, so that binary
1227          zeroes are treated as just another data character. */
1228    
1229          /* This extra option, for Jeffrey Friedl's debugging requirements,
1230          replaces the matched string, or a specific captured string if it exists,
1231          with X. When this happens, colouring is ignored. */
1232    
1233    #ifdef JFRIEDL_DEBUG
1234          if (S_arg >= 0 && S_arg < mrc)
1235            {
1236            int first = S_arg * 2;
1237            int last  = first + 1;
1238            FWRITE(ptr, 1, offsets[first], stdout);
1239            fprintf(stdout, "X");
1240            FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1241            }
1242          else
1243    #endif
1244    
1245          /* We have to split the line(s) up if colouring, and search for further
1246          matches. */
1247    
1248          if (do_colour)
1249            {
1250            int last_offset = 0;
1251            FWRITE(ptr, 1, offsets[0], stdout);
1252            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1253            FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1254            fprintf(stdout, "%c[00m", 0x1b);
1255            for (;;)
1256              {
1257              last_offset += offsets[1];
1258              matchptr += offsets[1];
1259              length -= offsets[1];
1260              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1261              FWRITE(matchptr, 1, offsets[0], stdout);
1262              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1263              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1264              fprintf(stdout, "%c[00m", 0x1b);
1265              }
1266            FWRITE(ptr + last_offset, 1,
1267              (linelength + endlinelength) - last_offset, stdout);
1268            }
1269    
1270          /* Not colouring; no need to search for further matches */
1271    
1272          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1273          }
1274    
1275        /* End of doing what has to be done for a match */
1276    
1277        rc = 0;    /* Had some success */
1278    
1279        /* Remember where the last match happened for after_context. We remember
1280        where we are about to restart, and that line's number. */
1281    
1282        lastmatchrestart = ptr + linelength + endlinelength;
1283        lastmatchnumber = linenumber + 1;
1284        }
1285    
1286      /* For a match in multiline inverted mode (which of course did not cause
1287      anything to be printed), we have to move on to the end of the match before
1288      proceeding. */
1289    
1290      if (multiline && invert && match)
1291        {
1292        int ellength;
1293        char *endmatch = ptr + offsets[1];
1294        t = ptr;
1295        while (t < endmatch)
1296          {
1297          t = end_of_line(t, endptr, &ellength);
1298          if (t <= endmatch) linenumber++; else break;
1299          }
1300        endmatch = end_of_line(endmatch, endptr, &ellength);
1301        linelength = endmatch - ptr - ellength;
1302        }
1303    
1304      /* Advance to after the newline and increment the line number. The file
1305      offset to the current line is maintained in filepos. */
1306    
1307      ptr += linelength + endlinelength;
1308      filepos += linelength + endlinelength;
1309      linenumber++;
1310    
1311      /* If we haven't yet reached the end of the file (the buffer is full), and
1312      the current point is in the top 1/3 of the buffer, slide the buffer down by
1313      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1314      about to be lost, print them. */
1315    
1316      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1317        {
1318        if (after_context > 0 &&
1319            lastmatchnumber > 0 &&
1320            lastmatchrestart < buffer + MBUFTHIRD)
1321          {
1322          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1323          lastmatchnumber = 0;
1324          }
1325    
1326        /* Now do the shuffle */
1327    
1328        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1329        ptr -= MBUFTHIRD;
1330    
1331    #ifdef SUPPORT_LIBZ
1332        if (frtype == FR_LIBZ)
1333          bufflength = 2*MBUFTHIRD +
1334            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1335        else
1336    #endif
1337    
1338    #ifdef SUPPORT_LIBBZ2
1339        if (frtype == FR_LIBBZ2)
1340          bufflength = 2*MBUFTHIRD +
1341            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1342        else
1343    #endif
1344    
1345        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1346    
1347        endptr = buffer + bufflength;
1348    
1349        /* Adjust any last match point */
1350    
1351        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1352        }
1353      }     /* Loop through the whole file */
1354    
1355    /* End of file; print final "after" lines if wanted; do_after_lines sets
1356    hyphenpending if it prints something. */
1357    
1358    if (!only_matching && !count_only)
1359      {
1360      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1361      hyphenpending |= endhyphenpending;
1362      }
1363    
1364    /* Print the file name if we are looking for those without matches and there
1365    were none. If we found a match, we won't have got this far. */
1366    
1367    if (filenames == FN_NOMATCH_ONLY)
1368      {
1369      fprintf(stdout, "%s\n", printname);
1370      return 0;
1371      }
1372    
1373    /* Print the match count if wanted */
1374    
1375    if (count_only)
1376      {
1377      if (count > 0 || !omit_zero_count)
1378        {
1379        if (printname != NULL && filenames != FN_NONE)
1380          fprintf(stdout, "%s:", printname);
1381        fprintf(stdout, "%d\n", count);
1382        }
1383      }
1384    
1385    return rc;
1386    }
1387    
1388    
1389    
1390    /*************************************************
1391    *     Grep a file or recurse into a directory    *
1392    *************************************************/
1393    
1394    /* Given a path name, if it's a directory, scan all the files if we are
1395    recursing; if it's a file, grep it.
1396    
1397    Arguments:
1398      pathname          the path to investigate
1399      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1400      only_one_at_top   TRUE if the path is the only one at toplevel
1401    
1402    Returns:   0 if there was at least one match
1403               1 if there were no matches
1404               2 there was some kind of error
1405    
1406    However, file opening failures are suppressed if "silent" is set.
1407    */
1408    
1409    static int
1410    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1411    {
1412    int rc = 1;
1413    int sep;
1414    int frtype;
1415    int pathlen;
1416    void *handle;
1417    FILE *in = NULL;           /* Ensure initialized */
1418    
1419    #ifdef SUPPORT_LIBZ
1420    gzFile ingz = NULL;
1421    #endif
1422    
1423    #ifdef SUPPORT_LIBBZ2
1424    BZFILE *inbz2 = NULL;
1425    #endif
1426    
1427    /* If the file name is "-" we scan stdin */
1428    
1429    if (strcmp(pathname, "-") == 0)
1430      {
1431      return pcregrep(stdin, FR_PLAIN,
1432        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1433          stdin_name : NULL);
1434      }
1435    
1436    /* If the file is a directory, skip if skipping or if we are recursing, scan
1437    each file and directory within it, subject to any include or exclude patterns
1438    that were set. The scanning code is localized so it can be made
1439    system-specific. */
1440    
1441    if ((sep = isdirectory(pathname)) != 0)
1442      {
1443      if (dee_action == dee_SKIP) return 1;
1444      if (dee_action == dee_RECURSE)
1445        {
1446        char buffer[1024];
1447        char *nextfile;
1448        directory_type *dir = opendirectory(pathname);
1449    
1450        if (dir == NULL)
1451          {
1452          if (!silent)
1453            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1454              strerror(errno));
1455          return 2;
1456          }
1457    
1458        while ((nextfile = readdirectory(dir)) != NULL)
1459          {
1460          int frc, nflen;
1461          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1462          nflen = strlen(nextfile);
1463    
1464          if (isdirectory(buffer))
1465            {
1466            if (exclude_dir_compiled != NULL &&
1467                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1468              continue;
1469    
1470            if (include_dir_compiled != NULL &&
1471                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1472              continue;
1473            }
1474          else
1475            {
1476            if (exclude_compiled != NULL &&
1477                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1478              continue;
1479    
1480            if (include_compiled != NULL &&
1481                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1482              continue;
1483            }
1484    
1485          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1486          if (frc > 1) rc = frc;
1487           else if (frc == 0 && rc == 1) rc = 0;
1488          }
1489    
1490        closedirectory(dir);
1491        return rc;
1492        }
1493      }
1494    
1495    /* If the file is not a directory and not a regular file, skip it if that's
1496    been requested. */
1497    
1498    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1499    
1500    /* Control reaches here if we have a regular file, or if we have a directory
1501    and recursion or skipping was not requested, or if we have anything else and
1502    skipping was not requested. The scan proceeds. If this is the first and only
1503    argument at top level, we don't show the file name, unless we are only showing
1504    the file name, or the filename was forced (-H). */
1505    
1506    pathlen = strlen(pathname);
1507    
1508    /* Open using zlib if it is supported and the file name ends with .gz. */
1509    
1510    #ifdef SUPPORT_LIBZ
1511    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1512      {
1513      ingz = gzopen(pathname, "rb");
1514      if (ingz == NULL)
1515        {
1516        if (!silent)
1517          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1518            strerror(errno));
1519        return 2;
1520        }
1521      handle = (void *)ingz;
1522      frtype = FR_LIBZ;
1523      }
1524    else
1525    #endif
1526    
1527    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1528    
1529    #ifdef SUPPORT_LIBBZ2
1530    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1531      {
1532      inbz2 = BZ2_bzopen(pathname, "rb");
1533      handle = (void *)inbz2;
1534      frtype = FR_LIBBZ2;
1535      }
1536    else
1537    #endif
1538    
1539    /* Otherwise use plain fopen(). The label is so that we can come back here if
1540    an attempt to read a .bz2 file indicates that it really is a plain file. */
1541    
1542    #ifdef SUPPORT_LIBBZ2
1543    PLAIN_FILE:
1544    #endif
1545      {
1546      in = fopen(pathname, "rb");
1547      handle = (void *)in;
1548      frtype = FR_PLAIN;
1549      }
1550    
1551    /* All the opening methods return errno when they fail. */
1552    
1553    if (handle == NULL)
1554      {
1555      if (!silent)
1556        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1557          strerror(errno));
1558      return 2;
1559      }
1560    
1561    /* Now grep the file */
1562    
1563    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1564      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1565    
1566    /* Close in an appropriate manner. */
1567    
1568    #ifdef SUPPORT_LIBZ
1569    if (frtype == FR_LIBZ)
1570      gzclose(ingz);
1571    else
1572    #endif
1573    
1574    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1575    read failed. If the error indicates that the file isn't in fact bzipped, try
1576    again as a normal file. */
1577    
1578    #ifdef SUPPORT_LIBBZ2
1579    if (frtype == FR_LIBBZ2)
1580      {
1581      if (rc == 2)
1582        {
1583        int errnum;
1584        const char *err = BZ2_bzerror(inbz2, &errnum);
1585        if (errnum == BZ_DATA_ERROR_MAGIC)
1586          {
1587          BZ2_bzclose(inbz2);
1588          goto PLAIN_FILE;
1589          }
1590        else if (!silent)
1591          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1592            pathname, err);
1593        }
1594      BZ2_bzclose(inbz2);
1595      }
1596    else
1597    #endif
1598    
1599    /* Normal file close */
1600    
1601    fclose(in);
1602    
1603    /* Pass back the yield from pcregrep(). */
1604    
1605    return rc;
1606    }
1607    
1608    
1609    
1610    
1611    /*************************************************
1612    *                Usage function                  *
1613    *************************************************/
1614    
1615    static int
1616    usage(int rc)
1617    {
1618    option_item *op;
1619    fprintf(stderr, "Usage: pcregrep [-");
1620    for (op = optionlist; op->one_char != 0; op++)
1621      {
1622      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1623      }
1624    fprintf(stderr, "] [long options] [pattern] [files]\n");
1625    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1626      "options.\n");
1627    return rc;
1628    }
1629    
1630    
1631    
1632    
1633    /*************************************************
1634    *                Help function                   *
1635    *************************************************/
1636    
1637    static void
1638    help(void)
1639    {
1640    option_item *op;
1641    
1642    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1643    printf("Search for PATTERN in each FILE or standard input.\n");
1644    printf("PATTERN must be present if neither -e nor -f is used.\n");
1645    printf("\"-\" can be used as a file name to mean STDIN.\n");
1646    
1647    #ifdef SUPPORT_LIBZ
1648    printf("Files whose names end in .gz are read using zlib.\n");
1649    #endif
1650    
1651    #ifdef SUPPORT_LIBBZ2
1652    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1653    #endif
1654    
1655    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1656    printf("Other files and the standard input are read as plain files.\n\n");
1657    #else
1658    printf("All files are read as plain files, without any interpretation.\n\n");
1659    #endif
1660    
1661    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1662    printf("Options:\n");
1663    
1664    for (op = optionlist; op->one_char != 0; op++)
1665      {
1666      int n;
1667      char s[4];
1668      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1669      n = 30 - printf("  %s --%s", s, op->long_name);
1670      if (n < 1) n = 1;
1671      printf("%.*s%s\n", n, "                    ", op->help_text);
1672      }
1673    
1674    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1675    printf("trailing white space is removed and blank lines are ignored.\n");
1676    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1677    
1678    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1679    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1680    }
1681    
1682    
1683    
1684    
1685    /*************************************************
1686    *    Handle a single-letter, no data option      *
1687    *************************************************/
1688    
1689    static int
1690    handle_option(int letter, int options)
1691    {
1692    switch(letter)
1693      {
1694      case N_FOFFSETS: file_offsets = TRUE; break;
1695      case N_HELP: help(); exit(0);
1696      case N_LOFFSETS: line_offsets = number = TRUE; break;
1697      case 'c': count_only = TRUE; break;
1698      case 'F': process_options |= PO_FIXED_STRINGS; break;
1699      case 'H': filenames = FN_FORCE; break;
1700      case 'h': filenames = FN_NONE; break;
1701      case 'i': options |= PCRE_CASELESS; break;
1702      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1703      case 'L': filenames = FN_NOMATCH_ONLY; break;
1704      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1705      case 'n': number = TRUE; break;
1706      case 'o': only_matching = TRUE; break;
1707      case 'q': quiet = TRUE; break;
1708      case 'r': dee_action = dee_RECURSE; break;
1709      case 's': silent = TRUE; break;
1710      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1711      case 'v': invert = TRUE; break;
1712      case 'w': process_options |= PO_WORD_MATCH; break;
1713      case 'x': process_options |= PO_LINE_MATCH; break;
1714    
1715      case 'V':
1716      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1717      exit(0);
1718      break;
1719    
1720      default:
1721      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1722      exit(usage(2));
1723      }
1724    
1725    return options;
1726    }
1727    
1728    
1729    
1730    
1731    /*************************************************
1732    *          Construct printed ordinal             *
1733    *************************************************/
1734    
1735    /* This turns a number into "1st", "3rd", etc. */
1736    
1737    static char *
1738    ordin(int n)
1739    {
1740    static char buffer[8];
1741    char *p = buffer;
1742    sprintf(p, "%d", n);
1743    while (*p != 0) p++;
1744    switch (n%10)
1745      {
1746      case 1: strcpy(p, "st"); break;
1747      case 2: strcpy(p, "nd"); break;
1748      case 3: strcpy(p, "rd"); break;
1749      default: strcpy(p, "th"); break;
1750      }
1751    return buffer;
1752    }
1753    
1754    
1755    
1756    /*************************************************
1757    *          Compile a single pattern              *
1758    *************************************************/
1759    
1760    /* When the -F option has been used, this is called for each substring.
1761    Otherwise it's called for each supplied pattern.
1762    
1763    Arguments:
1764      pattern        the pattern string
1765      options        the PCRE options
1766      filename       the file name, or NULL for a command-line pattern
1767      count          0 if this is the only command line pattern, or
1768                     number of the command line pattern, or
1769                     linenumber for a pattern from a file
1770    
1771    Returns:         TRUE on success, FALSE after an error
1772    */
1773    
1774    static BOOL
1775    compile_single_pattern(char *pattern, int options, char *filename, int count)
1776    {
1777    char buffer[MBUFTHIRD + 16];
1778    const char *error;
1779    int errptr;
1780    
1781    if (pattern_count >= MAX_PATTERN_COUNT)
1782      {
1783      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1784        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1785      return FALSE;
1786      }
1787    
1788    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1789      suffix[process_options]);
1790    pattern_list[pattern_count] =
1791      pcre_compile(buffer, options, &error, &errptr, pcretables);
1792    if (pattern_list[pattern_count] != NULL)
1793      {
1794      pattern_count++;
1795      return TRUE;
1796      }
1797    
1798    /* Handle compile errors */
1799    
1800    errptr -= (int)strlen(prefix[process_options]);
1801    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1802    
1803    if (filename == NULL)
1804      {
1805      if (count == 0)
1806        fprintf(stderr, "pcregrep: Error in command-line regex "
1807          "at offset %d: %s\n", errptr, error);
1808      else
1809        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1810          "at offset %d: %s\n", ordin(count), errptr, error);
1811      }
1812    else
1813      {
1814      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1815        "at offset %d: %s\n", count, filename, errptr, error);
1816      }
1817    
1818    return FALSE;
1819    }
1820    
1821    
1822    
1823    /*************************************************
1824    *           Compile one supplied pattern         *
1825    *************************************************/
1826    
1827    /* When the -F option has been used, each string may be a list of strings,
1828    separated by line breaks. They will be matched literally.
1829    
1830    Arguments:
1831      pattern        the pattern string
1832      options        the PCRE options
1833      filename       the file name, or NULL for a command-line pattern
1834      count          0 if this is the only command line pattern, or
1835                     number of the command line pattern, or
1836                     linenumber for a pattern from a file
1837    
1838    Returns:         TRUE on success, FALSE after an error
1839    */
1840    
1841    static BOOL
1842    compile_pattern(char *pattern, int options, char *filename, int count)
1843    {
1844    if ((process_options & PO_FIXED_STRINGS) != 0)
1845      {
1846      char *eop = pattern + strlen(pattern);
1847      char buffer[MBUFTHIRD];
1848      for(;;)
1849        {
1850        int ellength;
1851        char *p = end_of_line(pattern, eop, &ellength);
1852        if (ellength == 0)
1853          return compile_single_pattern(pattern, options, filename, count);
1854        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1855        pattern = p;
1856        if (!compile_single_pattern(buffer, options, filename, count))
1857          return FALSE;
1858        }
1859      }
1860    else return compile_single_pattern(pattern, options, filename, count);
1861    }
1862    
1863    
1864    
1865    /*************************************************
1866    *                Main program                    *
1867    *************************************************/
1868    
1869    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1870    
1871    int
1872    main(int argc, char **argv)
1873    {
1874    int i, j;
1875    int rc = 1;
1876    int pcre_options = 0;
1877    int cmd_pattern_count = 0;
1878    int hint_count = 0;
1879    int errptr;
1880    BOOL only_one_at_top;
1881    char *patterns[MAX_PATTERN_COUNT];
1882    const char *locale_from = "--locale";
1883    const char *error;
1884    
1885    /* Set the default line ending value from the default in the PCRE library;
1886    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1887    Note that the return values from pcre_config(), though derived from the ASCII
1888    codes, are the same in EBCDIC environments, so we must use the actual values
1889    rather than escapes such as as '\r'. */
1890    
1891    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1892    switch(i)
1893      {
1894      default:               newline = (char *)"lf"; break;
1895      case 13:               newline = (char *)"cr"; break;
1896      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1897      case -1:               newline = (char *)"any"; break;
1898      case -2:               newline = (char *)"anycrlf"; break;
1899      }
1900    
1901    /* Process the options */
1902    
1903    for (i = 1; i < argc; i++)
1904      {
1905      option_item *op = NULL;
1906      char *option_data = (char *)"";    /* default to keep compiler happy */
1907      BOOL longop;
1908      BOOL longopwasequals = FALSE;
1909    
1910      if (argv[i][0] != '-') break;
1911    
1912      /* If we hit an argument that is just "-", it may be a reference to STDIN,
1913      but only if we have previously had -e or -f to define the patterns. */
1914    
1915      if (argv[i][1] == 0)
1916        {
1917        if (pattern_filename != NULL || pattern_count > 0) break;
1918          else exit(usage(2));
1919        }
1920    
1921      /* Handle a long name option, or -- to terminate the options */
1922    
1923      if (argv[i][1] == '-')
1924        {
1925        char *arg = argv[i] + 2;
1926        char *argequals = strchr(arg, '=');
1927    
1928        if (*arg == 0)    /* -- terminates options */
1929          {
1930          i++;
1931          break;                /* out of the options-handling loop */
1932          }
1933    
1934        longop = TRUE;
1935    
1936        /* Some long options have data that follows after =, for example file=name.
1937        Some options have variations in the long name spelling: specifically, we
1938        allow "regexp" because GNU grep allows it, though I personally go along
1939        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1940        These options are entered in the table as "regex(p)". Options can be in
1941        both these categories. */
1942    
1943        for (op = optionlist; op->one_char != 0; op++)
1944          {
1945          char *opbra = strchr(op->long_name, '(');
1946          char *equals = strchr(op->long_name, '=');
1947    
1948          /* Handle options with only one spelling of the name */
1949    
1950          if (opbra == NULL)     /* Does not contain '(' */
1951            {
1952            if (equals == NULL)  /* Not thing=data case */
1953              {
1954              if (strcmp(arg, op->long_name) == 0) break;
1955              }
1956            else                 /* Special case xxx=data */
1957              {
1958              int oplen = equals - op->long_name;
1959              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1960              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1961                {
1962                option_data = arg + arglen;
1963                if (*option_data == '=')
1964                  {
1965                  option_data++;
1966                  longopwasequals = TRUE;
1967                  }
1968                break;
1969                }
1970              }
1971            }
1972    
1973          /* Handle options with an alternate spelling of the name */
1974    
1975          else
1976            {
1977            char buff1[24];
1978            char buff2[24];
1979    
1980            int baselen = opbra - op->long_name;
1981            int fulllen = strchr(op->long_name, ')') - op->long_name + 1;
1982            int arglen = (argequals == NULL || equals == NULL)?
1983              (int)strlen(arg) : argequals - arg;
1984    
1985            sprintf(buff1, "%.*s", baselen, op->long_name);
1986            sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
1987    
1988            if (strncmp(arg, buff1, arglen) == 0 ||
1989               strncmp(arg, buff2, arglen) == 0)
1990              {
1991              if (equals != NULL && argequals != NULL)
1992                {
1993                option_data = argequals;
1994                if (*option_data == '=')
1995                  {
1996                  option_data++;
1997                  longopwasequals = TRUE;
1998                  }
1999                }
2000              break;
2001              }
2002            }
2003          }
2004    
2005        if (op->one_char == 0)
2006          {
2007          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2008          exit(usage(2));
2009          }
2010        }
2011    
2012      /* Jeffrey Friedl's debugging harness uses these additional options which
2013      are not in the right form for putting in the option table because they use
2014      only one hyphen, yet are more than one character long. By putting them
2015      separately here, they will not get displayed as part of the help() output,
2016      but I don't think Jeffrey will care about that. */
2017    
2018    #ifdef JFRIEDL_DEBUG
2019      else if (strcmp(argv[i], "-pre") == 0) {
2020              jfriedl_prefix = argv[++i];
2021              continue;
2022      } else if (strcmp(argv[i], "-post") == 0) {
2023              jfriedl_postfix = argv[++i];
2024              continue;
2025      } else if (strcmp(argv[i], "-XT") == 0) {
2026              sscanf(argv[++i], "%d", &jfriedl_XT);
2027              continue;
2028      } else if (strcmp(argv[i], "-XR") == 0) {
2029              sscanf(argv[++i], "%d", &jfriedl_XR);
2030              continue;
2031      }
2032    #endif
2033    
2034    
2035      /* One-char options; many that have no data may be in a single argument; we
2036      continue till we hit the last one or one that needs data. */
2037    
2038      else
2039        {
2040        char *s = argv[i] + 1;
2041        longop = FALSE;
2042        while (*s != 0)
2043          {
2044          for (op = optionlist; op->one_char != 0; op++)
2045            { if (*s == op->one_char) break; }
2046          if (op->one_char == 0)
2047            {
2048            fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2049              *s, argv[i]);
2050            exit(usage(2));
2051            }
2052          if (op->type != OP_NODATA || s[1] == 0)
2053            {
2054            option_data = s+1;
2055            break;
2056            }
2057          pcre_options = handle_option(*s++, pcre_options);
2058          }
2059        }
2060    
2061      /* At this point we should have op pointing to a matched option. If the type
2062      is NO_DATA, it means that there is no data, and the option might set
2063      something in the PCRE options. */
2064    
2065      if (op->type == OP_NODATA)
2066        {
2067        pcre_options = handle_option(op->one_char, pcre_options);
2068        continue;
2069        }
2070    
2071      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2072      either has a value or defaults to something. It cannot have data in a
2073      separate item. At the moment, the only such options are "colo(u)r" and
2074      Jeffrey Friedl's special -S debugging option. */
2075    
2076      if (*option_data == 0 &&
2077          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2078        {
2079        switch (op->one_char)
2080          {
2081          case N_COLOUR:
2082          colour_option = (char *)"auto";
2083          break;
2084    #ifdef JFRIEDL_DEBUG
2085          case 'S':
2086          S_arg = 0;
2087          break;
2088    #endif
2089          }
2090        continue;
2091        }
2092    
2093      /* Otherwise, find the data string for the option. */
2094    
2095      if (*option_data == 0)
2096        {
2097        if (i >= argc - 1 || longopwasequals)
2098          {
2099          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2100          exit(usage(2));
2101          }
2102        option_data = argv[++i];
2103        }
2104    
2105      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2106      multiple times to create a list of patterns. */
2107    
2108      if (op->type == OP_PATLIST)
2109        {
2110        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2111          {
2112          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2113            MAX_PATTERN_COUNT);
2114          return 2;
2115          }
2116        patterns[cmd_pattern_count++] = option_data;
2117        }
2118    
2119      /* Otherwise, deal with single string or numeric data values. */
2120    
2121      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2122        {
2123        *((char **)op->dataptr) = option_data;
2124        }
2125      else
2126        {
2127        char *endptr;
2128        int n = strtoul(option_data, &endptr, 10);
2129        if (*endptr != 0)
2130          {
2131          if (longop)
2132            {
2133            char *equals = strchr(op->long_name, '=');
2134            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2135              equals - op->long_name;
2136            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2137              option_data, nlen, op->long_name);
2138            }
2139          else
2140            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2141              option_data, op->one_char);
2142          exit(usage(2));
2143          }
2144        *((int *)op->dataptr) = n;
2145        }
2146      }
2147    
2148    /* Options have been decoded. If -C was used, its value is used as a default
2149    for -A and -B. */
2150    
2151    if (both_context > 0)
2152      {
2153      if (after_context == 0) after_context = both_context;
2154      if (before_context == 0) before_context = both_context;
2155      }
2156    
2157    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2158    However, the latter two set the only_matching flag. */
2159    
2160    if ((only_matching && (file_offsets || line_offsets)) ||
2161        (file_offsets && line_offsets))
2162      {
2163      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2164        "and/or --line-offsets\n");
2165      exit(usage(2));
2166      }
2167    
2168    if (file_offsets || line_offsets) only_matching = TRUE;
2169    
2170    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2171    LC_ALL environment variable is set, and if so, use it. */
2172    
2173    if (locale == NULL)
2174      {
2175      locale = getenv("LC_ALL");
2176      locale_from = "LCC_ALL";
2177      }
2178    
2179    if (locale == NULL)
2180      {
2181      locale = getenv("LC_CTYPE");
2182      locale_from = "LC_CTYPE";
2183      }
2184    
2185    /* If a locale has been provided, set it, and generate the tables the PCRE
2186    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2187    
2188    if (locale != NULL)
2189      {
2190      if (setlocale(LC_CTYPE, locale) == NULL)
2191        {
2192        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2193          locale, locale_from);
2194        return 2;
2195        }
2196      pcretables = pcre_maketables();
2197      }
2198    
2199    /* Sort out colouring */
2200    
2201    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2202      {
2203      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2204      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2205      else
2206        {
2207        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2208          colour_option);
2209        return 2;
2210        }
2211      if (do_colour)
2212        {
2213        char *cs = getenv("PCREGREP_COLOUR");
2214        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2215        if (cs != NULL) colour_string = cs;
2216        }
2217      }
2218    
2219    /* Interpret the newline type; the default settings are Unix-like. */
2220    
2221    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2222      {
2223      pcre_options |= PCRE_NEWLINE_CR;
2224      endlinetype = EL_CR;
2225      }
2226    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2227      {
2228      pcre_options |= PCRE_NEWLINE_LF;
2229      endlinetype = EL_LF;
2230      }
2231    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2232      {
2233      pcre_options |= PCRE_NEWLINE_CRLF;
2234      endlinetype = EL_CRLF;
2235      }
2236    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2237      {
2238      pcre_options |= PCRE_NEWLINE_ANY;
2239      endlinetype = EL_ANY;
2240      }
2241    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2242      {
2243      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2244      endlinetype = EL_ANYCRLF;
2245      }
2246    else
2247      {
2248      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2249      return 2;
2250      }
2251    
2252    /* Interpret the text values for -d and -D */
2253    
2254    if (dee_option != NULL)
2255      {
2256      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2257      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2258      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2259      else
2260        {
2261        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2262        return 2;
2263        }
2264      }
2265    
2266    if (DEE_option != NULL)
2267      {
2268      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2269      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2270      else
2271        {
2272        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2273        return 2;
2274        }
2275      }
2276    
2277    /* Check the values for Jeffrey Friedl's debugging options. */
2278    
2279    #ifdef JFRIEDL_DEBUG
2280    if (S_arg > 9)
2281      {
2282      fprintf(stderr, "pcregrep: bad value for -S option\n");
2283      return 2;
2284      }
2285    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2286      {
2287      if (jfriedl_XT == 0) jfriedl_XT = 1;
2288      if (jfriedl_XR == 0) jfriedl_XR = 1;
2289      }
2290    #endif
2291    
2292    /* Get memory to store the pattern and hints lists. */
2293    
2294    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2295    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2296    
2297    if (pattern_list == NULL || hints_list == NULL)
2298      {
2299      fprintf(stderr, "pcregrep: malloc failed\n");
2300      goto EXIT2;
2301      }
2302    
2303    /* If no patterns were provided by -e, and there is no file provided by -f,
2304    the first argument is the one and only pattern, and it must exist. */
2305    
2306    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2307      {
2308      if (i >= argc) return usage(2);
2309      patterns[cmd_pattern_count++] = argv[i++];
2310      }
2311    
2312    /* Compile the patterns that were provided on the command line, either by
2313    multiple uses of -e or as a single unkeyed pattern. */
2314    
2315    for (j = 0; j < cmd_pattern_count; j++)
2316      {
2317      if (!compile_pattern(patterns[j], pcre_options, NULL,
2318           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2319        goto EXIT2;
2320      }
2321    
2322    /* Compile the regular expressions that are provided in a file. */
2323    
2324    if (pattern_filename != NULL)
2325      {
2326      int linenumber = 0;
2327      FILE *f;
2328      char *filename;
2329      char buffer[MBUFTHIRD];
2330    
2331      if (strcmp(pattern_filename, "-") == 0)
2332        {
2333        f = stdin;
2334        filename = stdin_name;
2335        }
2336      else
2337        {
2338        f = fopen(pattern_filename, "r");
2339        if (f == NULL)
2340          {
2341          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2342            strerror(errno));
2343          goto EXIT2;
2344          }
2345        filename = pattern_filename;
2346        }
2347    
2348      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2349        {
2350        char *s = buffer + (int)strlen(buffer);
2351        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2352        *s = 0;
2353        linenumber++;
2354        if (buffer[0] == 0) continue;   /* Skip blank lines */
2355        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2356          goto EXIT2;
2357        }
2358    
2359      if (f != stdin) fclose(f);
2360      }
2361    
2362    /* Study the regular expressions, as we will be running them many times */
2363    
2364    for (j = 0; j < pattern_count; j++)
2365      {
2366      hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2367      if (error != NULL)
2368        {
2369        char s[16];
2370        if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2371        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2372        goto EXIT2;
2373        }
2374      hint_count++;
2375      }
2376    
2377    /* If there are include or exclude patterns, compile them. */
2378    
2379    if (exclude_pattern != NULL)
2380      {
2381      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2382        pcretables);
2383      if (exclude_compiled == NULL)
2384        {
2385        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2386          errptr, error);
2387        goto EXIT2;
2388        }
2389      }
2390    
2391    if (include_pattern != NULL)
2392      {
2393      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2394        pcretables);
2395      if (include_compiled == NULL)
2396        {
2397        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2398          errptr, error);
2399        goto EXIT2;
2400        }
2401      }
2402    
2403    if (exclude_dir_pattern != NULL)
2404      {
2405      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2406        pcretables);
2407      if (exclude_dir_compiled == NULL)
2408        {
2409        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2410          errptr, error);
2411        goto EXIT2;
2412        }
2413      }
2414    
2415    if (include_dir_pattern != NULL)
2416      {
2417      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2418        pcretables);
2419      if (include_dir_compiled == NULL)
2420        {
2421        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2422          errptr, error);
2423        goto EXIT2;
2424        }
2425      }
2426    
2427    /* If there are no further arguments, do the business on stdin and exit. */
2428    
2429    if (i >= argc)
2430      {
2431      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2432      goto EXIT;
2433      }
2434    
2435    /* Otherwise, work through the remaining arguments as files or directories.
2436    Pass in the fact that there is only one argument at top level - this suppresses
2437    the file name if the argument is not a directory and filenames are not
2438    otherwise forced. */
2439    
2440    only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2441    
2442    for (; i < argc; i++)
2443      {
2444      int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2445        only_one_at_top);
2446      if (frc > 1) rc = frc;
2447        else if (frc == 0 && rc == 1) rc = 0;
2448      }
2449    
2450    EXIT:
2451    if (pattern_list != NULL)
2452      {
2453      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2454      free(pattern_list);
2455      }
2456    if (hints_list != NULL)
2457      {
2458      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2459      free(hints_list);
2460      }
2461    return rc;
2462    
2463    EXIT2:
2464    rc = 2;
2465    goto EXIT;
2466  }  }
2467    
2468  /* End */  /* End of pcregrep */

Legend:
Removed from v.49  
changed lines
  Added in v.515

  ViewVC Help
Powered by ViewVC 1.1.5