/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 377 by ph10, Sun Mar 1 12:07:19 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2009 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 70  its pattern matching. On a Unix system i
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "2.0 01-Aug-2001"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    
75    #if BUFSIZ > 8192
76    #define MBUFTHIRD BUFSIZ
77    #else
78    #define MBUFTHIRD 8192
79    #endif
80    
81    /* Values for the "filenames" variable, which specifies options for file name
82    output. The order is important; it is assumed that a file name is wanted for
83    all values greater than FN_DEFAULT. */
84    
85    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86    
87    /* File reading styles */
88    
89    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90    
91    /* Actions for the -d and -D options */
92    
93    enum { dee_READ, dee_SKIP, dee_RECURSE };
94    enum { DEE_READ, DEE_SKIP };
95    
96    /* Actions for special processing options (flag bits) */
97    
98    #define PO_WORD_MATCH     0x0001
99    #define PO_LINE_MATCH     0x0002
100    #define PO_FIXED_STRINGS  0x0004
101    
102    /* Line ending types */
103    
104    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105    
106    
107    
108  /*************************************************  /*************************************************
109  *               Global variables                 *  *               Global variables                 *
110  *************************************************/  *************************************************/
111    
112    /* Jeffrey Friedl has some debugging requirements that are not part of the
113    regular code. */
114    
115    #ifdef JFRIEDL_DEBUG
116    static int S_arg = -1;
117    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
118    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
119    static const char *jfriedl_prefix = "";
120    static const char *jfriedl_postfix = "";
121    #endif
122    
123    static int  endlinetype;
124    
125    static char *colour_string = (char *)"1;31";
126    static char *colour_option = NULL;
127    static char *dee_option = NULL;
128    static char *DEE_option = NULL;
129    static char *newline = NULL;
130  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
131    static char *stdin_name = (char *)"(standard input)";
132    static char *locale = NULL;
133    
134    static const unsigned char *pcretables = NULL;
135    
136  static int  pattern_count = 0;  static int  pattern_count = 0;
137  static pcre **pattern_list;  static pcre **pattern_list = NULL;
138  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
139    
140    static char *include_pattern = NULL;
141    static char *exclude_pattern = NULL;
142    static char *include_dir_pattern = NULL;
143    static char *exclude_dir_pattern = NULL;
144    
145    static pcre *include_compiled = NULL;
146    static pcre *exclude_compiled = NULL;
147    static pcre *include_dir_compiled = NULL;
148    static pcre *exclude_dir_compiled = NULL;
149    
150    static int after_context = 0;
151    static int before_context = 0;
152    static int both_context = 0;
153    static int dee_action = dee_READ;
154    static int DEE_action = DEE_READ;
155    static int error_count = 0;
156    static int filenames = FN_DEFAULT;
157    static int process_options = 0;
158    
159  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
160  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
161  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
162    static BOOL hyphenpending = FALSE;
163  static BOOL invert = FALSE;  static BOOL invert = FALSE;
164    static BOOL line_offsets = FALSE;
165    static BOOL multiline = FALSE;
166  static BOOL number = FALSE;  static BOOL number = FALSE;
167  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
168    static BOOL quiet = FALSE;
169  static BOOL silent = FALSE;  static BOOL silent = FALSE;
170  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
171    
172  /* Structure for options and list of them */  /* Structure for options and list of them */
173    
174    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
175           OP_PATLIST };
176    
177  typedef struct option_item {  typedef struct option_item {
178      int type;
179    int one_char;    int one_char;
180    char *long_name;    void *dataptr;
181    char *help_text;    const char *long_name;
182      const char *help_text;
183  } option_item;  } option_item;
184    
185    /* Options without a single-letter equivalent get a negative value. This can be
186    used to identify them. */
187    
188    #define N_COLOUR       (-1)
189    #define N_EXCLUDE      (-2)
190    #define N_EXCLUDE_DIR  (-3)
191    #define N_HELP         (-4)
192    #define N_INCLUDE      (-5)
193    #define N_INCLUDE_DIR  (-6)
194    #define N_LABEL        (-7)
195    #define N_LOCALE       (-8)
196    #define N_NULL         (-9)
197    #define N_LOFFSETS     (-10)
198    #define N_FOFFSETS     (-11)
199    
200  static option_item optionlist[] = {  static option_item optionlist[] = {
201    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
202    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
203    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
204    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
205    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
206    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
207    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
208    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
209    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
210    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
211    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
212    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
213    { 0,    NULL,           NULL }    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
214      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
215      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
216      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
217      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
218      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
219      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
220      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
221      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
222      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
223      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
224      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
225      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
226      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
227      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
228      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
229      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
230      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
231      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
232      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
233    #ifdef JFRIEDL_DEBUG
234      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
235    #endif
236      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
237      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
238      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
239      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
240      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
241      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
242      { OP_NODATA,    0,        NULL,               NULL,            NULL }
243  };  };
244    
245    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
246    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
247    that the combination of -w and -x has the same effect as -x on its own, so we
248    can treat them as the same. */
249    
250    static const char *prefix[] = {
251      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
252    
253    static const char *suffix[] = {
254      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
255    
256    /* UTF-8 tables - used only when the newline setting is "any". */
257    
258    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
259    
260    const char utf8_table4[] = {
261      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
262      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
264      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
265    
266    
267    
268  /*************************************************  /*************************************************
269  *       Functions for directory scanning         *  *            OS-specific functions               *
270  *************************************************/  *************************************************/
271    
272  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
273  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
274    
275    
276  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
277    
278  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
279  #include <sys/types.h>  #include <sys/types.h>
280  #include <sys/stat.h>  #include <sys/stat.h>
281  #include <dirent.h>  #include <dirent.h>
282    
283  typedef DIR directory_type;  typedef DIR directory_type;
284    
285  int  static int
286  isdirectory(char *filename)  isdirectory(char *filename)
287  {  {
288  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 291  if (stat(filename, &statbuf) < 0)
291  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
292  }  }
293    
294  directory_type *  static directory_type *
295  opendirectory(char *filename)  opendirectory(char *filename)
296  {  {
297  return opendir(filename);  return opendir(filename);
298  }  }
299    
300  char *  static char *
301  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
302  {  {
303  for (;;)  for (;;)
# Line 108  for (;;) Line 307  for (;;)
307    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
308      return dent->d_name;      return dent->d_name;
309    }    }
310    /* Control never reaches here */
311    }
312    
313    static void
314    closedirectory(directory_type *dir)
315    {
316    closedir(dir);
317    }
318    
319    
320    /************* Test for regular file in Unix **********/
321    
322    static int
323    isregfile(char *filename)
324    {
325    struct stat statbuf;
326    if (stat(filename, &statbuf) < 0)
327      return 1;        /* In the expectation that opening as a file will fail */
328    return (statbuf.st_mode & S_IFMT) == S_IFREG;
329    }
330    
331    
332    /************* Test stdout for being a terminal in Unix **********/
333    
334    static BOOL
335    is_stdout_tty(void)
336    {
337    return isatty(fileno(stdout));
338    }
339    
340    
341    /************* Directory scanning in Win32 ***********/
342    
343    /* I (Philip Hazel) have no means of testing this code. It was contributed by
344    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
345    when it did not exist. David Byron added a patch that moved the #include of
346    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
347    */
348    
349    #elif HAVE_WINDOWS_H
350    
351    #ifndef STRICT
352    # define STRICT
353    #endif
354    #ifndef WIN32_LEAN_AND_MEAN
355    # define WIN32_LEAN_AND_MEAN
356    #endif
357    
358    #include <windows.h>
359    
360    #ifndef INVALID_FILE_ATTRIBUTES
361    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
362    #endif
363    
364    typedef struct directory_type
365    {
366    HANDLE handle;
367    BOOL first;
368    WIN32_FIND_DATA data;
369    } directory_type;
370    
371    int
372    isdirectory(char *filename)
373    {
374    DWORD attr = GetFileAttributes(filename);
375    if (attr == INVALID_FILE_ATTRIBUTES)
376      return 0;
377    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
378    }
379    
380    directory_type *
381    opendirectory(char *filename)
382    {
383    size_t len;
384    char *pattern;
385    directory_type *dir;
386    DWORD err;
387    len = strlen(filename);
388    pattern = (char *) malloc(len + 3);
389    dir = (directory_type *) malloc(sizeof(*dir));
390    if ((pattern == NULL) || (dir == NULL))
391      {
392      fprintf(stderr, "pcregrep: malloc failed\n");
393      exit(2);
394      }
395    memcpy(pattern, filename, len);
396    memcpy(&(pattern[len]), "\\*", 3);
397    dir->handle = FindFirstFile(pattern, &(dir->data));
398    if (dir->handle != INVALID_HANDLE_VALUE)
399      {
400      free(pattern);
401      dir->first = TRUE;
402      return dir;
403      }
404    err = GetLastError();
405    free(pattern);
406    free(dir);
407    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
408    return NULL;
409    }
410    
411    char *
412    readdirectory(directory_type *dir)
413    {
414    for (;;)
415      {
416      if (!dir->first)
417        {
418        if (!FindNextFile(dir->handle, &(dir->data)))
419          return NULL;
420        }
421      else
422        {
423        dir->first = FALSE;
424        }
425      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
426        return dir->data.cFileName;
427      }
428    #ifndef _MSC_VER
429  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
430    #endif
431  }  }
432    
433  void  void
434  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
435  {  {
436  closedir(dir);  FindClose(dir->handle);
437    free(dir);
438  }  }
439    
440    
441  #else  /************* Test for regular file in Win32 **********/
442    
443    /* I don't know how to do this, or if it can be done; assume all paths are
444    regular if they are not directories. */
445    
446    int isregfile(char *filename)
447    {
448    return !isdirectory(filename);
449    }
450    
451    
452    /************* Test stdout for being a terminal in Win32 **********/
453    
454    /* I don't know how to do this; assume never */
455    
456    static BOOL
457    is_stdout_tty(void)
458    {
459    return FALSE;
460    }
461    
462    
463  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
464    
465  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
466    
467    #else
468    
469  typedef void directory_type;  typedef void directory_type;
470    
471  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
472  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
473  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
474  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
475    
476    
477    /************* Test for regular when we can't do it **********/
478    
479    /* Assume all files are regular. */
480    
481    int isregfile(char *filename) { return 1; }
482    
483    
484    /************* Test stdout for being a terminal when we can't do it **********/
485    
486    static BOOL
487    is_stdout_tty(void)
488    {
489    return FALSE;
490    }
491    
492    
493  #endif  #endif
494    
495    
496    
497  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
498  /*************************************************  /*************************************************
499  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
500  *************************************************/  *************************************************/
# Line 159  return sys_errlist[n]; Line 517  return sys_errlist[n];
517    
518    
519  /*************************************************  /*************************************************
520  *              Grep an individual file           *  *             Find end of line                   *
521  *************************************************/  *************************************************/
522    
523  static int  /* The length of the endline sequence that is found is set via lenptr. This may
524  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
 {  
 int rc = 1;  
 int linenumber = 0;  
 int count = 0;  
 int offsets[99];  
 char buffer[BUFSIZ];  
525    
526  while (fgets(buffer, sizeof(buffer), in) != NULL)  Arguments:
527      p         current position in line
528      endptr    end of available data
529      lenptr    where to put the length of the eol sequence
530    
531    Returns:    pointer to the last byte of the line
532    */
533    
534    static char *
535    end_of_line(char *p, char *endptr, int *lenptr)
536    {
537    switch(endlinetype)
538    {    {
539    BOOL match = FALSE;    default:      /* Just in case */
540    int i;    case EL_LF:
541    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
542    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
543    linenumber++;      {
544        *lenptr = 1;
545        return p + 1;
546        }
547      *lenptr = 0;
548      return endptr;
549    
550    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
551      while (p < endptr && *p != '\r') p++;
552      if (p < endptr)
553      {      {
554      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
555        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
556      }      }
557      *lenptr = 0;
558      return endptr;
559    
560    if (match != invert)    case EL_CRLF:
561      for (;;)
562      {      {
563      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
564        if (++p >= endptr)
565          {
566          *lenptr = 0;
567          return endptr;
568          }
569        if (*p == '\n')
570          {
571          *lenptr = 2;
572          return p + 1;
573          }
574        }
575      break;
576    
577      case EL_ANYCRLF:
578      while (p < endptr)
579        {
580        int extra = 0;
581        register int c = *((unsigned char *)p);
582    
583      else if (filenames_only)      if (utf8 && c >= 0xc0)
584        {        {
585        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
586        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
587          gcss = 6*extra;
588          c = (c & utf8_table3[extra]) << gcss;
589          for (gcii = 1; gcii <= extra; gcii++)
590            {
591            gcss -= 6;
592            c |= (p[gcii] & 0x3f) << gcss;
593            }
594        }        }
595    
596      else if (silent) return 0;      p += 1 + extra;
597    
598      else      switch (c)
599        {        {
600        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
601        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
602        fprintf(stdout, "%s\n", buffer);        return p;
603    
604          case 0x0d:    /* CR */
605          if (p < endptr && *p == 0x0a)
606            {
607            *lenptr = 2;
608            p++;
609            }
610          else *lenptr = 1;
611          return p;
612    
613          default:
614          break;
615        }        }
616        }   /* End of loop for ANYCRLF case */
617    
618      rc = 0;    *lenptr = 0;  /* Must have hit the end */
619      }    return endptr;
   }  
620    
621  if (count_only)    case EL_ANY:
622    {    while (p < endptr)
623    if (name != NULL) fprintf(stdout, "%s:", name);      {
624    fprintf(stdout, "%d\n", count);      int extra = 0;
625    }      register int c = *((unsigned char *)p);
626    
627  return rc;      if (utf8 && c >= 0xc0)
628  }        {
629          int gcii, gcss;
630          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
631          gcss = 6*extra;
632          c = (c & utf8_table3[extra]) << gcss;
633          for (gcii = 1; gcii <= extra; gcii++)
634            {
635            gcss -= 6;
636            c |= (p[gcii] & 0x3f) << gcss;
637            }
638          }
639    
640        p += 1 + extra;
641    
642        switch (c)
643          {
644          case 0x0a:    /* LF */
645          case 0x0b:    /* VT */
646          case 0x0c:    /* FF */
647          *lenptr = 1;
648          return p;
649    
650          case 0x0d:    /* CR */
651          if (p < endptr && *p == 0x0a)
652            {
653            *lenptr = 2;
654            p++;
655            }
656          else *lenptr = 1;
657          return p;
658    
659          case 0x85:    /* NEL */
660          *lenptr = utf8? 2 : 1;
661          return p;
662    
663          case 0x2028:  /* LS */
664          case 0x2029:  /* PS */
665          *lenptr = 3;
666          return p;
667    
668          default:
669          break;
670          }
671        }   /* End of loop for ANY case */
672    
673      *lenptr = 0;  /* Must have hit the end */
674      return endptr;
675      }     /* End of overall switch */
676    }
677    
678    
679    
680  /*************************************************  /*************************************************
681  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
682  *************************************************/  *************************************************/
683    
684  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
685    
686  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
687  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
688      startptr  start of available data
689    
690  if ((sep = isdirectory(filename)) != 0 && recurse)  Returns:    pointer to the start of the previous line
691    {  */
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
692    
693    if (dir == NULL)  static char *
694      {  previous_line(char *p, char *startptr)
695      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  {
696        strerror(errno));  switch(endlinetype)
697      return 2;    {
698      }    default:      /* Just in case */
699      case EL_LF:
700      p--;
701      while (p > startptr && p[-1] != '\n') p--;
702      return p;
703    
704      case EL_CR:
705      p--;
706      while (p > startptr && p[-1] != '\n') p--;
707      return p;
708    
709    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
710      for (;;)
711      {      {
712      int frc;      p -= 2;
713      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      while (p > startptr && p[-1] != '\n') p--;
714      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);      if (p <= startptr + 1 || p[-2] == '\r') return p;
     if (frc == 0 && rc == 1) rc = 0;  
715      }      }
716      return p;   /* But control should never get here */
717    
718    closedirectory(dir);    case EL_ANY:
719    return rc;    case EL_ANYCRLF:
720    }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
721      if (utf8) while ((*p & 0xc0) == 0x80) p--;
722    
723  /* If the file is not a directory, or we are not recursing, scan it. If this is    while (p > startptr)
724  the first and only argument at top level, we don't show the file name.      {
725  Otherwise, control is via the show_filenames variable. */      register int c;
726        char *pp = p - 1;
727    
728  in = fopen(filename, "r");      if (utf8)
729  if (in == NULL)        {
730    {        int extra = 0;
731    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));        while ((*pp & 0xc0) == 0x80) pp--;
732    return 2;        c = *((unsigned char *)pp);
733    }        if (c >= 0xc0)
734            {
735            int gcii, gcss;
736            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
737            gcss = 6*extra;
738            c = (c & utf8_table3[extra]) << gcss;
739            for (gcii = 1; gcii <= extra; gcii++)
740              {
741              gcss -= 6;
742              c |= (pp[gcii] & 0x3f) << gcss;
743              }
744            }
745          }
746        else c = *((unsigned char *)pp);
747    
748  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);      if (endlinetype == EL_ANYCRLF) switch (c)
749  fclose(in);        {
750  return rc;        case 0x0a:    /* LF */
751  }        case 0x0d:    /* CR */
752          return p;
753    
754          default:
755          break;
756          }
757    
758        else switch (c)
759          {
760          case 0x0a:    /* LF */
761          case 0x0b:    /* VT */
762          case 0x0c:    /* FF */
763          case 0x0d:    /* CR */
764          case 0x85:    /* NEL */
765          case 0x2028:  /* LS */
766          case 0x2029:  /* PS */
767          return p;
768    
769          default:
770          break;
771          }
772    
773  /*************************************************      p = pp;  /* Back one character */
774  *                Usage function                  *      }        /* End of loop for ANY case */
 *************************************************/  
775    
776  static int    return startptr;  /* Hit start of data */
777  usage(int rc)    }     /* End of overall switch */
 {  
 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
778  }  }
779    
780    
781    
782    
783    
784  /*************************************************  /*************************************************
785  *                Help function                   *  *       Print the previous "after" lines         *
786  *************************************************/  *************************************************/
787    
788  static void  /* This is called if we are about to lose said lines because of buffer filling,
789  help(void)  and at the end of the file. The data in the line is written using fwrite() so
790  {  that a binary zero does not terminate it.
791  option_item *op;  
792    Arguments:
793      lastmatchnumber   the number of the last matching line, plus one
794      lastmatchrestart  where we restarted after the last match
795      endptr            end of available data
796      printname         filename for printing
797    
798  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  Returns:            nothing
799  printf("Search for PATTERN in each FILE or standard input.\n");  */
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
   
 printf("Options:\n");  
800    
801  for (op = optionlist; op->one_char != 0; op++)  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
802      char *endptr, char *printname)
803    {
804    if (after_context > 0 && lastmatchnumber > 0)
805    {    {
806    int n;    int count = 0;
807    char s[4];    while (lastmatchrestart < endptr && count++ < after_context)
808    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");      {
809    printf("  %s --%s%n", s, op->long_name, &n);      int ellength;
810    n = 30 - n;      char *pp = lastmatchrestart;
811    if (n < 1) n = 1;      if (printname != NULL) fprintf(stdout, "%s-", printname);
812    printf("%.*s%s\n", n, "                    ", op->help_text);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
813        pp = end_of_line(pp, endptr, &ellength);
814        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
815        lastmatchrestart = pp;
816        }
817      hyphenpending = TRUE;
818    }    }
   
 printf("\n  -f<filename>  or  --file=<filename>\n");  
 printf("    Read patterns from <filename> instead of using a command line option.\n");  
 printf("    Trailing white space is removed; blanks lines are ignored.\n");  
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
819  }  }
820    
821    
822    
   
823  /*************************************************  /*************************************************
824  *                Handle an option                *  *            Grep an individual file             *
825  *************************************************/  *************************************************/
826    
827    /* This is called from grep_or_recurse() below. It uses a buffer that is three
828    times the value of MBUFTHIRD. The matching point is never allowed to stray into
829    the top third of the buffer, thus keeping more of the file available for
830    context printing or for multiline scanning. For large files, the pointer will
831    be in the middle third most of the time, so the bottom third is available for
832    "before" context printing.
833    
834    Arguments:
835      handle       the fopened FILE stream for a normal file
836                   the gzFile pointer when reading is via libz
837                   the BZFILE pointer when reading is via libbz2
838      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
839      printname    the file name if it is to be printed for each match
840                   or NULL if the file name is not to be printed
841                   it cannot be NULL if filenames[_nomatch]_only is set
842    
843    Returns:       0 if there was at least one match
844                   1 otherwise (no matches)
845                   2 if there is a read error on a .bz2 file
846    */
847    
848  static int  static int
849  handle_option(int letter, int options)  pcregrep(void *handle, int frtype, char *printname)
850  {  {
851  switch(letter)  int rc = 1;
852    {  int linenumber = 1;
853    case -1:  help(); exit(0);  int lastmatchnumber = 0;
854    case 'c': count_only = TRUE; break;  int count = 0;
855    case 'h': filenames = FALSE; break;  int filepos = 0;
856    case 'i': options |= PCRE_CASELESS; break;  int offsets[99];
857    case 'l': filenames_only = TRUE;  char *lastmatchrestart = NULL;
858    case 'n': number = TRUE; break;  char buffer[3*MBUFTHIRD];
859    case 'r': recurse = TRUE; break;  char *ptr = buffer;
860    case 's': silent = TRUE; break;  char *endptr;
861    case 'v': invert = TRUE; break;  size_t bufflength;
862    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  BOOL endhyphenpending = FALSE;
863    FILE *in = NULL;                    /* Ensure initialized */
864    case 'V':  
865    fprintf(stderr, "pcregrep version %s using ", VERSION);  #ifdef SUPPORT_LIBZ
866    fprintf(stderr, "PCRE version %s\n", pcre_version());  gzFile ingz = NULL;
867    #endif
868    
869    #ifdef SUPPORT_LIBBZ2
870    BZFILE *inbz2 = NULL;
871    #endif
872    
873    
874    /* Do the first read into the start of the buffer and set up the pointer to end
875    of what we have. In the case of libz, a non-zipped .gz file will be read as a
876    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
877    fail. */
878    
879    #ifdef SUPPORT_LIBZ
880    if (frtype == FR_LIBZ)
881      {
882      ingz = (gzFile)handle;
883      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
884      }
885    else
886    #endif
887    
888    #ifdef SUPPORT_LIBBZ2
889    if (frtype == FR_LIBBZ2)
890      {
891      inbz2 = (BZFILE *)handle;
892      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
893      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
894      }                                    /* without the cast it is unsigned. */
895    else
896    #endif
897    
898      {
899      in = (FILE *)handle;
900      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
901      }
902    
903    endptr = buffer + bufflength;
904    
905    /* Loop while the current pointer is not at the end of the file. For large
906    files, endptr will be at the end of the buffer when we are in the middle of the
907    file, but ptr will never get there, because as soon as it gets over 2/3 of the
908    way, the buffer is shifted left and re-filled. */
909    
910    while (ptr < endptr)
911      {
912      int i, endlinelength;
913      int mrc = 0;
914      BOOL match = FALSE;
915      char *matchptr = ptr;
916      char *t = ptr;
917      size_t length, linelength;
918    
919      /* At this point, ptr is at the start of a line. We need to find the length
920      of the subject string to pass to pcre_exec(). In multiline mode, it is the
921      length remainder of the data in the buffer. Otherwise, it is the length of
922      the next line. After matching, we always advance by the length of the next
923      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
924      that any match is constrained to be in the first line. */
925    
926      t = end_of_line(t, endptr, &endlinelength);
927      linelength = t - ptr - endlinelength;
928      length = multiline? (size_t)(endptr - ptr) : linelength;
929    
930      /* Extra processing for Jeffrey Friedl's debugging. */
931    
932    #ifdef JFRIEDL_DEBUG
933      if (jfriedl_XT || jfriedl_XR)
934      {
935          #include <sys/time.h>
936          #include <time.h>
937          struct timeval start_time, end_time;
938          struct timezone dummy;
939    
940          if (jfriedl_XT)
941          {
942              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
943              const char *orig = ptr;
944              ptr = malloc(newlen + 1);
945              if (!ptr) {
946                      printf("out of memory");
947                      exit(2);
948              }
949              endptr = ptr;
950              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
951              for (i = 0; i < jfriedl_XT; i++) {
952                      strncpy(endptr, orig,  length);
953                      endptr += length;
954              }
955              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
956              length = newlen;
957          }
958    
959          if (gettimeofday(&start_time, &dummy) != 0)
960                  perror("bad gettimeofday");
961    
962    
963          for (i = 0; i < jfriedl_XR; i++)
964              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
965    
966          if (gettimeofday(&end_time, &dummy) != 0)
967                  perror("bad gettimeofday");
968    
969          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
970                          -
971                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
972    
973          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
974          return 0;
975      }
976    #endif
977    
978      /* We come back here after a match when the -o option (only_matching) is set,
979      in order to find any further matches in the same line. */
980    
981      ONLY_MATCHING_RESTART:
982    
983      /* Run through all the patterns until one matches. Note that we don't include
984      the final newline in the subject string. */
985    
986      for (i = 0; i < pattern_count; i++)
987        {
988        mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
989          offsets, 99);
990        if (mrc >= 0) { match = TRUE; break; }
991        if (mrc != PCRE_ERROR_NOMATCH)
992          {
993          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
994          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
995          fprintf(stderr, "this line:\n");
996          fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
997          fprintf(stderr, "\n");
998          if (error_count == 0 &&
999              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
1000            {
1001            fprintf(stderr, "pcregrep: error %d means that a resource limit "
1002              "was exceeded\n", mrc);
1003            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
1004            }
1005          if (error_count++ > 20)
1006            {
1007            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
1008            exit(2);
1009            }
1010          match = invert;    /* No more matching; don't show the line again */
1011          break;
1012          }
1013        }
1014    
1015      /* If it's a match or a not-match (as required), do what's wanted. */
1016    
1017      if (match != invert)
1018        {
1019        BOOL hyphenprinted = FALSE;
1020    
1021        /* We've failed if we want a file that doesn't have any matches. */
1022    
1023        if (filenames == FN_NOMATCH_ONLY) return 1;
1024    
1025        /* Just count if just counting is wanted. */
1026    
1027        if (count_only) count++;
1028    
1029        /* If all we want is a file name, there is no need to scan any more lines
1030        in the file. */
1031    
1032        else if (filenames == FN_ONLY)
1033          {
1034          fprintf(stdout, "%s\n", printname);
1035          return 0;
1036          }
1037    
1038        /* Likewise, if all we want is a yes/no answer. */
1039    
1040        else if (quiet) return 0;
1041    
1042        /* The --only-matching option prints just the substring that matched, and
1043        the --file-offsets and --line-offsets options output offsets for the
1044        matching substring (they both force --only-matching). None of these options
1045        prints any context. Afterwards, adjust the start and length, and then jump
1046        back to look for further matches in the same line. If we are in invert
1047        mode, however, nothing is printed - this could be still useful because the
1048        return code is set. */
1049    
1050        else if (only_matching)
1051          {
1052          if (!invert)
1053            {
1054            if (printname != NULL) fprintf(stdout, "%s:", printname);
1055            if (number) fprintf(stdout, "%d:", linenumber);
1056            if (line_offsets)
1057              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1058                offsets[1] - offsets[0]);
1059            else if (file_offsets)
1060              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1061                offsets[1] - offsets[0]);
1062            else
1063              {
1064              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1065              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1066              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1067              }
1068            fprintf(stdout, "\n");
1069            matchptr += offsets[1];
1070            length -= offsets[1];
1071            match = FALSE;
1072            goto ONLY_MATCHING_RESTART;
1073            }
1074          }
1075    
1076        /* This is the default case when none of the above options is set. We print
1077        the matching lines(s), possibly preceded and/or followed by other lines of
1078        context. */
1079    
1080        else
1081          {
1082          /* See if there is a requirement to print some "after" lines from a
1083          previous match. We never print any overlaps. */
1084    
1085          if (after_context > 0 && lastmatchnumber > 0)
1086            {
1087            int ellength;
1088            int linecount = 0;
1089            char *p = lastmatchrestart;
1090    
1091            while (p < ptr && linecount < after_context)
1092              {
1093              p = end_of_line(p, ptr, &ellength);
1094              linecount++;
1095              }
1096    
1097            /* It is important to advance lastmatchrestart during this printing so
1098            that it interacts correctly with any "before" printing below. Print
1099            each line's data using fwrite() in case there are binary zeroes. */
1100    
1101            while (lastmatchrestart < p)
1102              {
1103              char *pp = lastmatchrestart;
1104              if (printname != NULL) fprintf(stdout, "%s-", printname);
1105              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1106              pp = end_of_line(pp, endptr, &ellength);
1107              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1108              lastmatchrestart = pp;
1109              }
1110            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1111            }
1112    
1113          /* If there were non-contiguous lines printed above, insert hyphens. */
1114    
1115          if (hyphenpending)
1116            {
1117            fprintf(stdout, "--\n");
1118            hyphenpending = FALSE;
1119            hyphenprinted = TRUE;
1120            }
1121    
1122          /* See if there is a requirement to print some "before" lines for this
1123          match. Again, don't print overlaps. */
1124    
1125          if (before_context > 0)
1126            {
1127            int linecount = 0;
1128            char *p = ptr;
1129    
1130            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1131                   linecount < before_context)
1132              {
1133              linecount++;
1134              p = previous_line(p, buffer);
1135              }
1136    
1137            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1138              fprintf(stdout, "--\n");
1139    
1140            while (p < ptr)
1141              {
1142              int ellength;
1143              char *pp = p;
1144              if (printname != NULL) fprintf(stdout, "%s-", printname);
1145              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1146              pp = end_of_line(pp, endptr, &ellength);
1147              fwrite(p, 1, pp - p, stdout);
1148              p = pp;
1149              }
1150            }
1151    
1152          /* Now print the matching line(s); ensure we set hyphenpending at the end
1153          of the file if any context lines are being output. */
1154    
1155          if (after_context > 0 || before_context > 0)
1156            endhyphenpending = TRUE;
1157    
1158          if (printname != NULL) fprintf(stdout, "%s:", printname);
1159          if (number) fprintf(stdout, "%d:", linenumber);
1160    
1161          /* In multiline mode, we want to print to the end of the line in which
1162          the end of the matched string is found, so we adjust linelength and the
1163          line number appropriately, but only when there actually was a match
1164          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1165          the match will always be before the first newline sequence. */
1166    
1167          if (multiline)
1168            {
1169            int ellength;
1170            char *endmatch = ptr;
1171            if (!invert)
1172              {
1173              endmatch += offsets[1];
1174              t = ptr;
1175              while (t < endmatch)
1176                {
1177                t = end_of_line(t, endptr, &ellength);
1178                if (t <= endmatch) linenumber++; else break;
1179                }
1180              }
1181            endmatch = end_of_line(endmatch, endptr, &ellength);
1182            linelength = endmatch - ptr - ellength;
1183            }
1184    
1185          /*** NOTE: Use only fwrite() to output the data line, so that binary
1186          zeroes are treated as just another data character. */
1187    
1188          /* This extra option, for Jeffrey Friedl's debugging requirements,
1189          replaces the matched string, or a specific captured string if it exists,
1190          with X. When this happens, colouring is ignored. */
1191    
1192    #ifdef JFRIEDL_DEBUG
1193          if (S_arg >= 0 && S_arg < mrc)
1194            {
1195            int first = S_arg * 2;
1196            int last  = first + 1;
1197            fwrite(ptr, 1, offsets[first], stdout);
1198            fprintf(stdout, "X");
1199            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1200            }
1201          else
1202    #endif
1203    
1204          /* We have to split the line(s) up if colouring. */
1205    
1206          if (do_colour)
1207            {
1208            fwrite(ptr, 1, offsets[0], stdout);
1209            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1210            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1211            fprintf(stdout, "%c[00m", 0x1b);
1212            fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1213              stdout);
1214            }
1215          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1216          }
1217    
1218        /* End of doing what has to be done for a match */
1219    
1220        rc = 0;    /* Had some success */
1221    
1222        /* Remember where the last match happened for after_context. We remember
1223        where we are about to restart, and that line's number. */
1224    
1225        lastmatchrestart = ptr + linelength + endlinelength;
1226        lastmatchnumber = linenumber + 1;
1227        }
1228    
1229      /* For a match in multiline inverted mode (which of course did not cause
1230      anything to be printed), we have to move on to the end of the match before
1231      proceeding. */
1232    
1233      if (multiline && invert && match)
1234        {
1235        int ellength;
1236        char *endmatch = ptr + offsets[1];
1237        t = ptr;
1238        while (t < endmatch)
1239          {
1240          t = end_of_line(t, endptr, &ellength);
1241          if (t <= endmatch) linenumber++; else break;
1242          }
1243        endmatch = end_of_line(endmatch, endptr, &ellength);
1244        linelength = endmatch - ptr - ellength;
1245        }
1246    
1247      /* Advance to after the newline and increment the line number. The file
1248      offset to the current line is maintained in filepos. */
1249    
1250      ptr += linelength + endlinelength;
1251      filepos += linelength + endlinelength;
1252      linenumber++;
1253    
1254      /* If we haven't yet reached the end of the file (the buffer is full), and
1255      the current point is in the top 1/3 of the buffer, slide the buffer down by
1256      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1257      about to be lost, print them. */
1258    
1259      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1260        {
1261        if (after_context > 0 &&
1262            lastmatchnumber > 0 &&
1263            lastmatchrestart < buffer + MBUFTHIRD)
1264          {
1265          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1266          lastmatchnumber = 0;
1267          }
1268    
1269        /* Now do the shuffle */
1270    
1271        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1272        ptr -= MBUFTHIRD;
1273    
1274    #ifdef SUPPORT_LIBZ
1275        if (frtype == FR_LIBZ)
1276          bufflength = 2*MBUFTHIRD +
1277            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1278        else
1279    #endif
1280    
1281    #ifdef SUPPORT_LIBBZ2
1282        if (frtype == FR_LIBBZ2)
1283          bufflength = 2*MBUFTHIRD +
1284            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1285        else
1286    #endif
1287    
1288        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1289    
1290        endptr = buffer + bufflength;
1291    
1292        /* Adjust any last match point */
1293    
1294        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1295        }
1296      }     /* Loop through the whole file */
1297    
1298    /* End of file; print final "after" lines if wanted; do_after_lines sets
1299    hyphenpending if it prints something. */
1300    
1301    if (!only_matching && !count_only)
1302      {
1303      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1304      hyphenpending |= endhyphenpending;
1305      }
1306    
1307    /* Print the file name if we are looking for those without matches and there
1308    were none. If we found a match, we won't have got this far. */
1309    
1310    if (filenames == FN_NOMATCH_ONLY)
1311      {
1312      fprintf(stdout, "%s\n", printname);
1313      return 0;
1314      }
1315    
1316    /* Print the match count if wanted */
1317    
1318    if (count_only)
1319      {
1320      if (printname != NULL) fprintf(stdout, "%s:", printname);
1321      fprintf(stdout, "%d\n", count);
1322      }
1323    
1324    return rc;
1325    }
1326    
1327    
1328    
1329    /*************************************************
1330    *     Grep a file or recurse into a directory    *
1331    *************************************************/
1332    
1333    /* Given a path name, if it's a directory, scan all the files if we are
1334    recursing; if it's a file, grep it.
1335    
1336    Arguments:
1337      pathname          the path to investigate
1338      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1339      only_one_at_top   TRUE if the path is the only one at toplevel
1340    
1341    Returns:   0 if there was at least one match
1342               1 if there were no matches
1343               2 there was some kind of error
1344    
1345    However, file opening failures are suppressed if "silent" is set.
1346    */
1347    
1348    static int
1349    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1350    {
1351    int rc = 1;
1352    int sep;
1353    int frtype;
1354    int pathlen;
1355    void *handle;
1356    FILE *in = NULL;           /* Ensure initialized */
1357    
1358    #ifdef SUPPORT_LIBZ
1359    gzFile ingz = NULL;
1360    #endif
1361    
1362    #ifdef SUPPORT_LIBBZ2
1363    BZFILE *inbz2 = NULL;
1364    #endif
1365    
1366    /* If the file name is "-" we scan stdin */
1367    
1368    if (strcmp(pathname, "-") == 0)
1369      {
1370      return pcregrep(stdin, FR_PLAIN,
1371        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1372          stdin_name : NULL);
1373      }
1374    
1375    /* If the file is a directory, skip if skipping or if we are recursing, scan
1376    each file and directory within it, subject to any include or exclude patterns
1377    that were set. The scanning code is localized so it can be made
1378    system-specific. */
1379    
1380    if ((sep = isdirectory(pathname)) != 0)
1381      {
1382      if (dee_action == dee_SKIP) return 1;
1383      if (dee_action == dee_RECURSE)
1384        {
1385        char buffer[1024];
1386        char *nextfile;
1387        directory_type *dir = opendirectory(pathname);
1388    
1389        if (dir == NULL)
1390          {
1391          if (!silent)
1392            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1393              strerror(errno));
1394          return 2;
1395          }
1396    
1397        while ((nextfile = readdirectory(dir)) != NULL)
1398          {
1399          int frc, nflen;
1400          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1401          nflen = strlen(nextfile);
1402    
1403          if (isdirectory(buffer))
1404            {
1405            if (exclude_dir_compiled != NULL &&
1406                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1407              continue;
1408    
1409            if (include_dir_compiled != NULL &&
1410                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1411              continue;
1412            }
1413          else
1414            {
1415            if (exclude_compiled != NULL &&
1416                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1417              continue;
1418    
1419            if (include_compiled != NULL &&
1420                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1421              continue;
1422            }
1423    
1424          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1425          if (frc > 1) rc = frc;
1426           else if (frc == 0 && rc == 1) rc = 0;
1427          }
1428    
1429        closedirectory(dir);
1430        return rc;
1431        }
1432      }
1433    
1434    /* If the file is not a directory and not a regular file, skip it if that's
1435    been requested. */
1436    
1437    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1438    
1439    /* Control reaches here if we have a regular file, or if we have a directory
1440    and recursion or skipping was not requested, or if we have anything else and
1441    skipping was not requested. The scan proceeds. If this is the first and only
1442    argument at top level, we don't show the file name, unless we are only showing
1443    the file name, or the filename was forced (-H). */
1444    
1445    pathlen = strlen(pathname);
1446    
1447    /* Open using zlib if it is supported and the file name ends with .gz. */
1448    
1449    #ifdef SUPPORT_LIBZ
1450    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1451      {
1452      ingz = gzopen(pathname, "rb");
1453      if (ingz == NULL)
1454        {
1455        if (!silent)
1456          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1457            strerror(errno));
1458        return 2;
1459        }
1460      handle = (void *)ingz;
1461      frtype = FR_LIBZ;
1462      }
1463    else
1464    #endif
1465    
1466    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1467    
1468    #ifdef SUPPORT_LIBBZ2
1469    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1470      {
1471      inbz2 = BZ2_bzopen(pathname, "rb");
1472      handle = (void *)inbz2;
1473      frtype = FR_LIBBZ2;
1474      }
1475    else
1476    #endif
1477    
1478    /* Otherwise use plain fopen(). The label is so that we can come back here if
1479    an attempt to read a .bz2 file indicates that it really is a plain file. */
1480    
1481    #ifdef SUPPORT_LIBBZ2
1482    PLAIN_FILE:
1483    #endif
1484      {
1485      in = fopen(pathname, "r");
1486      handle = (void *)in;
1487      frtype = FR_PLAIN;
1488      }
1489    
1490    /* All the opening methods return errno when they fail. */
1491    
1492    if (handle == NULL)
1493      {
1494      if (!silent)
1495        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1496          strerror(errno));
1497      return 2;
1498      }
1499    
1500    /* Now grep the file */
1501    
1502    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1503      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1504    
1505    /* Close in an appropriate manner. */
1506    
1507    #ifdef SUPPORT_LIBZ
1508    if (frtype == FR_LIBZ)
1509      gzclose(ingz);
1510    else
1511    #endif
1512    
1513    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1514    read failed. If the error indicates that the file isn't in fact bzipped, try
1515    again as a normal file. */
1516    
1517    #ifdef SUPPORT_LIBBZ2
1518    if (frtype == FR_LIBBZ2)
1519      {
1520      if (rc == 2)
1521        {
1522        int errnum;
1523        const char *err = BZ2_bzerror(inbz2, &errnum);
1524        if (errnum == BZ_DATA_ERROR_MAGIC)
1525          {
1526          BZ2_bzclose(inbz2);
1527          goto PLAIN_FILE;
1528          }
1529        else if (!silent)
1530          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1531            pathname, err);
1532        }
1533      BZ2_bzclose(inbz2);
1534      }
1535    else
1536    #endif
1537    
1538    /* Normal file close */
1539    
1540    fclose(in);
1541    
1542    /* Pass back the yield from pcregrep(). */
1543    
1544    return rc;
1545    }
1546    
1547    
1548    
1549    
1550    /*************************************************
1551    *                Usage function                  *
1552    *************************************************/
1553    
1554    static int
1555    usage(int rc)
1556    {
1557    option_item *op;
1558    fprintf(stderr, "Usage: pcregrep [-");
1559    for (op = optionlist; op->one_char != 0; op++)
1560      {
1561      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1562      }
1563    fprintf(stderr, "] [long options] [pattern] [files]\n");
1564    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1565      "options.\n");
1566    return rc;
1567    }
1568    
1569    
1570    
1571    
1572    /*************************************************
1573    *                Help function                   *
1574    *************************************************/
1575    
1576    static void
1577    help(void)
1578    {
1579    option_item *op;
1580    
1581    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1582    printf("Search for PATTERN in each FILE or standard input.\n");
1583    printf("PATTERN must be present if neither -e nor -f is used.\n");
1584    printf("\"-\" can be used as a file name to mean STDIN.\n");
1585    
1586    #ifdef SUPPORT_LIBZ
1587    printf("Files whose names end in .gz are read using zlib.\n");
1588    #endif
1589    
1590    #ifdef SUPPORT_LIBBZ2
1591    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1592    #endif
1593    
1594    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1595    printf("Other files and the standard input are read as plain files.\n\n");
1596    #else
1597    printf("All files are read as plain files, without any interpretation.\n\n");
1598    #endif
1599    
1600    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1601    printf("Options:\n");
1602    
1603    for (op = optionlist; op->one_char != 0; op++)
1604      {
1605      int n;
1606      char s[4];
1607      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1608      n = 30 - printf("  %s --%s", s, op->long_name);
1609      if (n < 1) n = 1;
1610      printf("%.*s%s\n", n, "                    ", op->help_text);
1611      }
1612    
1613    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1614    printf("trailing white space is removed and blank lines are ignored.\n");
1615    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1616    
1617    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1618    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1619    }
1620    
1621    
1622    
1623    
1624    /*************************************************
1625    *    Handle a single-letter, no data option      *
1626    *************************************************/
1627    
1628    static int
1629    handle_option(int letter, int options)
1630    {
1631    switch(letter)
1632      {
1633      case N_FOFFSETS: file_offsets = TRUE; break;
1634      case N_HELP: help(); exit(0);
1635      case N_LOFFSETS: line_offsets = number = TRUE; break;
1636      case 'c': count_only = TRUE; break;
1637      case 'F': process_options |= PO_FIXED_STRINGS; break;
1638      case 'H': filenames = FN_FORCE; break;
1639      case 'h': filenames = FN_NONE; break;
1640      case 'i': options |= PCRE_CASELESS; break;
1641      case 'l': filenames = FN_ONLY; break;
1642      case 'L': filenames = FN_NOMATCH_ONLY; break;
1643      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1644      case 'n': number = TRUE; break;
1645      case 'o': only_matching = TRUE; break;
1646      case 'q': quiet = TRUE; break;
1647      case 'r': dee_action = dee_RECURSE; break;
1648      case 's': silent = TRUE; break;
1649      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1650      case 'v': invert = TRUE; break;
1651      case 'w': process_options |= PO_WORD_MATCH; break;
1652      case 'x': process_options |= PO_LINE_MATCH; break;
1653    
1654      case 'V':
1655      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1656    exit(0);    exit(0);
1657    break;    break;
1658    
1659    default:    default:
1660    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1661    exit(usage(2));    exit(usage(2));
1662      }
1663    
1664    return options;
1665    }
1666    
1667    
1668    
1669    
1670    /*************************************************
1671    *          Construct printed ordinal             *
1672    *************************************************/
1673    
1674    /* This turns a number into "1st", "3rd", etc. */
1675    
1676    static char *
1677    ordin(int n)
1678    {
1679    static char buffer[8];
1680    char *p = buffer;
1681    sprintf(p, "%d", n);
1682    while (*p != 0) p++;
1683    switch (n%10)
1684      {
1685      case 1: strcpy(p, "st"); break;
1686      case 2: strcpy(p, "nd"); break;
1687      case 3: strcpy(p, "rd"); break;
1688      default: strcpy(p, "th"); break;
1689    }    }
1690    return buffer;
1691    }
1692    
1693  return options;  
1694    
1695    /*************************************************
1696    *          Compile a single pattern              *
1697    *************************************************/
1698    
1699    /* When the -F option has been used, this is called for each substring.
1700    Otherwise it's called for each supplied pattern.
1701    
1702    Arguments:
1703      pattern        the pattern string
1704      options        the PCRE options
1705      filename       the file name, or NULL for a command-line pattern
1706      count          0 if this is the only command line pattern, or
1707                     number of the command line pattern, or
1708                     linenumber for a pattern from a file
1709    
1710    Returns:         TRUE on success, FALSE after an error
1711    */
1712    
1713    static BOOL
1714    compile_single_pattern(char *pattern, int options, char *filename, int count)
1715    {
1716    char buffer[MBUFTHIRD + 16];
1717    const char *error;
1718    int errptr;
1719    
1720    if (pattern_count >= MAX_PATTERN_COUNT)
1721      {
1722      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1723        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1724      return FALSE;
1725      }
1726    
1727    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1728      suffix[process_options]);
1729    pattern_list[pattern_count] =
1730      pcre_compile(buffer, options, &error, &errptr, pcretables);
1731    if (pattern_list[pattern_count] != NULL)
1732      {
1733      pattern_count++;
1734      return TRUE;
1735      }
1736    
1737    /* Handle compile errors */
1738    
1739    errptr -= (int)strlen(prefix[process_options]);
1740    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1741    
1742    if (filename == NULL)
1743      {
1744      if (count == 0)
1745        fprintf(stderr, "pcregrep: Error in command-line regex "
1746          "at offset %d: %s\n", errptr, error);
1747      else
1748        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1749          "at offset %d: %s\n", ordin(count), errptr, error);
1750      }
1751    else
1752      {
1753      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1754        "at offset %d: %s\n", count, filename, errptr, error);
1755      }
1756    
1757    return FALSE;
1758  }  }
1759    
1760    
1761    
1762    /*************************************************
1763    *           Compile one supplied pattern         *
1764    *************************************************/
1765    
1766    /* When the -F option has been used, each string may be a list of strings,
1767    separated by line breaks. They will be matched literally.
1768    
1769    Arguments:
1770      pattern        the pattern string
1771      options        the PCRE options
1772      filename       the file name, or NULL for a command-line pattern
1773      count          0 if this is the only command line pattern, or
1774                     number of the command line pattern, or
1775                     linenumber for a pattern from a file
1776    
1777    Returns:         TRUE on success, FALSE after an error
1778    */
1779    
1780    static BOOL
1781    compile_pattern(char *pattern, int options, char *filename, int count)
1782    {
1783    if ((process_options & PO_FIXED_STRINGS) != 0)
1784      {
1785      char *eop = pattern + strlen(pattern);
1786      char buffer[MBUFTHIRD];
1787      for(;;)
1788        {
1789        int ellength;
1790        char *p = end_of_line(pattern, eop, &ellength);
1791        if (ellength == 0)
1792          return compile_single_pattern(pattern, options, filename, count);
1793        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1794        pattern = p;
1795        if (!compile_single_pattern(buffer, options, filename, count))
1796          return FALSE;
1797        }
1798      }
1799    else return compile_single_pattern(pattern, options, filename, count);
1800    }
1801    
1802    
1803    
1804  /*************************************************  /*************************************************
1805  *                Main program                    *  *                Main program                    *
1806  *************************************************/  *************************************************/
1807    
1808    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1809    
1810  int  int
1811  main(int argc, char **argv)  main(int argc, char **argv)
1812  {  {
1813  int i, j;  int i, j;
1814  int rc = 1;  int rc = 1;
1815  int options = 0;  int pcre_options = 0;
1816    int cmd_pattern_count = 0;
1817    int hint_count = 0;
1818  int errptr;  int errptr;
 const char *error;  
1819  BOOL only_one_at_top;  BOOL only_one_at_top;
1820    char *patterns[MAX_PATTERN_COUNT];
1821    const char *locale_from = "--locale";
1822    const char *error;
1823    
1824    /* Set the default line ending value from the default in the PCRE library;
1825    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1826    */
1827    
1828    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1829    switch(i)
1830      {
1831      default:                 newline = (char *)"lf"; break;
1832      case '\r':               newline = (char *)"cr"; break;
1833      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1834      case -1:                 newline = (char *)"any"; break;
1835      case -2:                 newline = (char *)"anycrlf"; break;
1836      }
1837    
1838  /* Process the options */  /* Process the options */
1839    
1840  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
1841    {    {
1842      option_item *op = NULL;
1843      char *option_data = (char *)"";    /* default to keep compiler happy */
1844      BOOL longop;
1845      BOOL longopwasequals = FALSE;
1846    
1847    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1848    
1849    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1850      but only if we have previously had -e or -f to define the patterns. */
1851    
1852      if (argv[i][1] == 0)
1853        {
1854        if (pattern_filename != NULL || pattern_count > 0) break;
1855          else exit(usage(2));
1856        }
1857    
1858      /* Handle a long name option, or -- to terminate the options */
1859    
1860    if (argv[i][1] == '-')    if (argv[i][1] == '-')
1861      {      {
1862      option_item *op;      char *arg = argv[i] + 2;
1863        char *argequals = strchr(arg, '=');
1864    
1865      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
1866        {        {
1867        pattern_filename = argv[i] + 7;        i++;
1868        continue;        break;                /* out of the options-handling loop */
1869        }        }
1870    
1871        longop = TRUE;
1872    
1873        /* Some long options have data that follows after =, for example file=name.
1874        Some options have variations in the long name spelling: specifically, we
1875        allow "regexp" because GNU grep allows it, though I personally go along
1876        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1877        These options are entered in the table as "regex(p)". No option is in both
1878        these categories, fortunately. */
1879    
1880      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1881        {        {
1882        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1883          char *equals = strchr(op->long_name, '=');
1884          if (opbra == NULL)     /* Not a (p) case */
1885          {          {
1886          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1887          break;            {
1888              if (strcmp(arg, op->long_name) == 0) break;
1889              }
1890            else                 /* Special case xxx=data */
1891              {
1892              int oplen = equals - op->long_name;
1893              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1894              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1895                {
1896                option_data = arg + arglen;
1897                if (*option_data == '=')
1898                  {
1899                  option_data++;
1900                  longopwasequals = TRUE;
1901                  }
1902                break;
1903                }
1904              }
1905            }
1906          else                   /* Special case xxxx(p) */
1907            {
1908            char buff1[24];
1909            char buff2[24];
1910            int baselen = opbra - op->long_name;
1911            sprintf(buff1, "%.*s", baselen, op->long_name);
1912            sprintf(buff2, "%s%.*s", buff1,
1913              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1914            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1915              break;
1916          }          }
1917        }        }
1918    
1919      if (op->one_char == 0)      if (op->one_char == 0)
1920        {        {
1921        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 417  for (i = 1; i < argc; i++) Line 1923  for (i = 1; i < argc; i++)
1923        }        }
1924      }      }
1925    
1926    /* One-char options */  
1927      /* Jeffrey Friedl's debugging harness uses these additional options which
1928      are not in the right form for putting in the option table because they use
1929      only one hyphen, yet are more than one character long. By putting them
1930      separately here, they will not get displayed as part of the help() output,
1931      but I don't think Jeffrey will care about that. */
1932    
1933    #ifdef JFRIEDL_DEBUG
1934      else if (strcmp(argv[i], "-pre") == 0) {
1935              jfriedl_prefix = argv[++i];
1936              continue;
1937      } else if (strcmp(argv[i], "-post") == 0) {
1938              jfriedl_postfix = argv[++i];
1939              continue;
1940      } else if (strcmp(argv[i], "-XT") == 0) {
1941              sscanf(argv[++i], "%d", &jfriedl_XT);
1942              continue;
1943      } else if (strcmp(argv[i], "-XR") == 0) {
1944              sscanf(argv[++i], "%d", &jfriedl_XR);
1945              continue;
1946      }
1947    #endif
1948    
1949    
1950      /* One-char options; many that have no data may be in a single argument; we
1951      continue till we hit the last one or one that needs data. */
1952    
1953    else    else
1954      {      {
1955      char *s = argv[i] + 1;      char *s = argv[i] + 1;
1956        longop = FALSE;
1957      while (*s != 0)      while (*s != 0)
1958        {        {
1959        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
1960            { if (*s == op->one_char) break; }
1961          if (op->one_char == 0)
1962          {          {
1963          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1964          if (pattern_filename[0] == 0)            *s, argv[i]);
1965            {          exit(usage(2));
1966            if (i >= argc - 1)          }
1967              {        if (op->type != OP_NODATA || s[1] == 0)
1968              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
1969              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
1970          break;          break;
1971          }          }
1972        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1973        }        }
1974      }      }
   }  
1975    
1976  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
1977  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
1978      something in the PCRE options. */
1979    
1980  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
1981    {      {
1982    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
1983    return 2;      continue;
1984    }      }
1985    
1986  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1987      either has a value or defaults to something. It cannot have data in a
1988      separate item. At the moment, the only such options are "colo(u)r" and
1989      Jeffrey Friedl's special -S debugging option. */
1990    
1991  if (pattern_filename != NULL)    if (*option_data == 0 &&
1992    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
1993      {      {
1994      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
1995        strerror(errno));        {
1996      return 2;        case N_COLOUR:
1997          colour_option = (char *)"auto";
1998          break;
1999    #ifdef JFRIEDL_DEBUG
2000          case 'S':
2001          S_arg = 0;
2002          break;
2003    #endif
2004          }
2005        continue;
2006      }      }
2007    while (fgets(buffer, sizeof(buffer), f) != NULL)  
2008      /* Otherwise, find the data string for the option. */
2009    
2010      if (*option_data == 0)
2011      {      {
2012      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
2013      if (pattern_count >= MAX_PATTERN_COUNT)        {
2014          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2015          exit(usage(2));
2016          }
2017        option_data = argv[++i];
2018        }
2019    
2020      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2021      multiple times to create a list of patterns. */
2022    
2023      if (op->type == OP_PATLIST)
2024        {
2025        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2026        {        {
2027        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2028          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2029        return 2;        return 2;
2030        }        }
2031      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2032      if (s == buffer) continue;      }
2033      *s = 0;  
2034      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2035        &errptr, NULL);  
2036      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2037        {
2038        *((char **)op->dataptr) = option_data;
2039        }
2040      else
2041        {
2042        char *endptr;
2043        int n = strtoul(option_data, &endptr, 10);
2044        if (*endptr != 0)
2045        {        {
2046        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
2047          pattern_count, errptr, error);          {
2048        return 2;          char *equals = strchr(op->long_name, '=');
2049            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2050              equals - op->long_name;
2051            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2052              option_data, nlen, op->long_name);
2053            }
2054          else
2055            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2056              option_data, op->one_char);
2057          exit(usage(2));
2058        }        }
2059        *((int *)op->dataptr) = n;
2060        }
2061      }
2062    
2063    /* Options have been decoded. If -C was used, its value is used as a default
2064    for -A and -B. */
2065    
2066    if (both_context > 0)
2067      {
2068      if (after_context == 0) after_context = both_context;
2069      if (before_context == 0) before_context = both_context;
2070      }
2071    
2072    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2073    However, the latter two set the only_matching flag. */
2074    
2075    if ((only_matching && (file_offsets || line_offsets)) ||
2076        (file_offsets && line_offsets))
2077      {
2078      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2079        "and/or --line-offsets\n");
2080      exit(usage(2));
2081      }
2082    
2083    if (file_offsets || line_offsets) only_matching = TRUE;
2084    
2085    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2086    LC_ALL environment variable is set, and if so, use it. */
2087    
2088    if (locale == NULL)
2089      {
2090      locale = getenv("LC_ALL");
2091      locale_from = "LCC_ALL";
2092      }
2093    
2094    if (locale == NULL)
2095      {
2096      locale = getenv("LC_CTYPE");
2097      locale_from = "LC_CTYPE";
2098      }
2099    
2100    /* If a locale has been provided, set it, and generate the tables the PCRE
2101    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2102    
2103    if (locale != NULL)
2104      {
2105      if (setlocale(LC_CTYPE, locale) == NULL)
2106        {
2107        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2108          locale, locale_from);
2109        return 2;
2110        }
2111      pcretables = pcre_maketables();
2112      }
2113    
2114    /* Sort out colouring */
2115    
2116    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2117      {
2118      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2119      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2120      else
2121        {
2122        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2123          colour_option);
2124        return 2;
2125        }
2126      if (do_colour)
2127        {
2128        char *cs = getenv("PCREGREP_COLOUR");
2129        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2130        if (cs != NULL) colour_string = cs;
2131      }      }
   fclose(f);  
2132    }    }
2133    
2134  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2135    
2136    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2137      {
2138      pcre_options |= PCRE_NEWLINE_CR;
2139      endlinetype = EL_CR;
2140      }
2141    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2142      {
2143      pcre_options |= PCRE_NEWLINE_LF;
2144      endlinetype = EL_LF;
2145      }
2146    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2147      {
2148      pcre_options |= PCRE_NEWLINE_CRLF;
2149      endlinetype = EL_CRLF;
2150      }
2151    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2152      {
2153      pcre_options |= PCRE_NEWLINE_ANY;
2154      endlinetype = EL_ANY;
2155      }
2156    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2157      {
2158      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2159      endlinetype = EL_ANYCRLF;
2160      }
2161  else  else
2162    {    {
2163    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2164    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2165    if (pattern_list[0] == NULL)    }
2166    
2167    /* Interpret the text values for -d and -D */
2168    
2169    if (dee_option != NULL)
2170      {
2171      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2172      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2173      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2174      else
2175        {
2176        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2177        return 2;
2178        }
2179      }
2180    
2181    if (DEE_option != NULL)
2182      {
2183      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2184      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2185      else
2186      {      {
2187      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
       error);  
2188      return 2;      return 2;
2189      }      }
   pattern_count++;  
2190    }    }
2191    
2192  /* Study the regular expressions, as we will be running them may times */  /* Check the values for Jeffrey Friedl's debugging options. */
2193    
2194    #ifdef JFRIEDL_DEBUG
2195    if (S_arg > 9)
2196      {
2197      fprintf(stderr, "pcregrep: bad value for -S option\n");
2198      return 2;
2199      }
2200    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2201      {
2202      if (jfriedl_XT == 0) jfriedl_XT = 1;
2203      if (jfriedl_XR == 0) jfriedl_XR = 1;
2204      }
2205    #endif
2206    
2207    /* Get memory to store the pattern and hints lists. */
2208    
2209    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2210    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2211    
2212    if (pattern_list == NULL || hints_list == NULL)
2213      {
2214      fprintf(stderr, "pcregrep: malloc failed\n");
2215      goto EXIT2;
2216      }
2217    
2218    /* If no patterns were provided by -e, and there is no file provided by -f,
2219    the first argument is the one and only pattern, and it must exist. */
2220    
2221    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2222      {
2223      if (i >= argc) return usage(2);
2224      patterns[cmd_pattern_count++] = argv[i++];
2225      }
2226    
2227    /* Compile the patterns that were provided on the command line, either by
2228    multiple uses of -e or as a single unkeyed pattern. */
2229    
2230    for (j = 0; j < cmd_pattern_count; j++)
2231      {
2232      if (!compile_pattern(patterns[j], pcre_options, NULL,
2233           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2234        goto EXIT2;
2235      }
2236    
2237    /* Compile the regular expressions that are provided in a file. */
2238    
2239    if (pattern_filename != NULL)
2240      {
2241      int linenumber = 0;
2242      FILE *f;
2243      char *filename;
2244      char buffer[MBUFTHIRD];
2245    
2246      if (strcmp(pattern_filename, "-") == 0)
2247        {
2248        f = stdin;
2249        filename = stdin_name;
2250        }
2251      else
2252        {
2253        f = fopen(pattern_filename, "r");
2254        if (f == NULL)
2255          {
2256          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2257            strerror(errno));
2258          goto EXIT2;
2259          }
2260        filename = pattern_filename;
2261        }
2262    
2263      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2264        {
2265        char *s = buffer + (int)strlen(buffer);
2266        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2267        *s = 0;
2268        linenumber++;
2269        if (buffer[0] == 0) continue;   /* Skip blank lines */
2270        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2271          goto EXIT2;
2272        }
2273    
2274      if (f != stdin) fclose(f);
2275      }
2276    
2277    /* Study the regular expressions, as we will be running them many times */
2278    
2279  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2280    {    {
# Line 513  for (j = 0; j < pattern_count; j++) Line 2284  for (j = 0; j < pattern_count; j++)
2284      char s[16];      char s[16];
2285      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2286      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2287      return 2;      goto EXIT2;
2288        }
2289      hint_count++;
2290      }
2291    
2292    /* If there are include or exclude patterns, compile them. */
2293    
2294    if (exclude_pattern != NULL)
2295      {
2296      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2297        pcretables);
2298      if (exclude_compiled == NULL)
2299        {
2300        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2301          errptr, error);
2302        goto EXIT2;
2303        }
2304      }
2305    
2306    if (include_pattern != NULL)
2307      {
2308      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2309        pcretables);
2310      if (include_compiled == NULL)
2311        {
2312        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2313          errptr, error);
2314        goto EXIT2;
2315        }
2316      }
2317    
2318    if (exclude_dir_pattern != NULL)
2319      {
2320      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2321        pcretables);
2322      if (exclude_dir_compiled == NULL)
2323        {
2324        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2325          errptr, error);
2326        goto EXIT2;
2327        }
2328      }
2329    
2330    if (include_dir_pattern != NULL)
2331      {
2332      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2333        pcretables);
2334      if (include_dir_compiled == NULL)
2335        {
2336        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2337          errptr, error);
2338        goto EXIT2;
2339      }      }
2340    }    }
2341    
2342  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2343    
2344  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2345      {
2346      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2347      goto EXIT;
2348      }
2349    
2350  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2351  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2352  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2353    otherwise forced. */
2354    
2355  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2356    
2357  for (; i < argc; i++)  for (; i < argc; i++)
2358    {    {
2359    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2360    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2361      if (frc > 1) rc = frc;
2362        else if (frc == 0 && rc == 1) rc = 0;
2363    }    }
2364    
2365    EXIT:
2366    if (pattern_list != NULL)
2367      {
2368      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2369      free(pattern_list);
2370      }
2371    if (hints_list != NULL)
2372      {
2373      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2374      free(hints_list);
2375      }
2376  return rc;  return rc;
2377    
2378    EXIT2:
2379    rc = 2;
2380    goto EXIT;
2381  }  }
2382    
2383  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.377

  ViewVC Help
Powered by ViewVC 1.1.5