/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 379 by ph10, Mon Mar 2 20:30:05 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2009 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 70  its pattern matching. On a Unix system i
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "2.0 01-Aug-2001"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define MBUFTHIRD BUFSIZ
78    #else
79    #define MBUFTHIRD 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    
108    
109  /*************************************************  /*************************************************
110  *               Global variables                 *  *               Global variables                 *
111  *************************************************/  *************************************************/
112    
113    /* Jeffrey Friedl has some debugging requirements that are not part of the
114    regular code. */
115    
116    #ifdef JFRIEDL_DEBUG
117    static int S_arg = -1;
118    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120    static const char *jfriedl_prefix = "";
121    static const char *jfriedl_postfix = "";
122    #endif
123    
124    static int  endlinetype;
125    
126    static char *colour_string = (char *)"1;31";
127    static char *colour_option = NULL;
128    static char *dee_option = NULL;
129    static char *DEE_option = NULL;
130    static char *newline = NULL;
131  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
132    static char *stdin_name = (char *)"(standard input)";
133    static char *locale = NULL;
134    
135    static const unsigned char *pcretables = NULL;
136    
137  static int  pattern_count = 0;  static int  pattern_count = 0;
138  static pcre **pattern_list;  static pcre **pattern_list = NULL;
139  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
140    
141    static char *include_pattern = NULL;
142    static char *exclude_pattern = NULL;
143    static char *include_dir_pattern = NULL;
144    static char *exclude_dir_pattern = NULL;
145    
146    static pcre *include_compiled = NULL;
147    static pcre *exclude_compiled = NULL;
148    static pcre *include_dir_compiled = NULL;
149    static pcre *exclude_dir_compiled = NULL;
150    
151    static int after_context = 0;
152    static int before_context = 0;
153    static int both_context = 0;
154    static int dee_action = dee_READ;
155    static int DEE_action = DEE_READ;
156    static int error_count = 0;
157    static int filenames = FN_DEFAULT;
158    static int process_options = 0;
159    
160  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
161  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
162  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
163    static BOOL hyphenpending = FALSE;
164  static BOOL invert = FALSE;  static BOOL invert = FALSE;
165    static BOOL line_offsets = FALSE;
166    static BOOL multiline = FALSE;
167  static BOOL number = FALSE;  static BOOL number = FALSE;
168  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
169    static BOOL quiet = FALSE;
170  static BOOL silent = FALSE;  static BOOL silent = FALSE;
171  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
172    
173  /* Structure for options and list of them */  /* Structure for options and list of them */
174    
175    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
176           OP_PATLIST };
177    
178  typedef struct option_item {  typedef struct option_item {
179      int type;
180    int one_char;    int one_char;
181    char *long_name;    void *dataptr;
182    char *help_text;    const char *long_name;
183      const char *help_text;
184  } option_item;  } option_item;
185    
186    /* Options without a single-letter equivalent get a negative value. This can be
187    used to identify them. */
188    
189    #define N_COLOUR       (-1)
190    #define N_EXCLUDE      (-2)
191    #define N_EXCLUDE_DIR  (-3)
192    #define N_HELP         (-4)
193    #define N_INCLUDE      (-5)
194    #define N_INCLUDE_DIR  (-6)
195    #define N_LABEL        (-7)
196    #define N_LOCALE       (-8)
197    #define N_NULL         (-9)
198    #define N_LOFFSETS     (-10)
199    #define N_FOFFSETS     (-11)
200    
201  static option_item optionlist[] = {  static option_item optionlist[] = {
202    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
203    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
204    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
205    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
206    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
207    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
208    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
209    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
210    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
211    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
212    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
213    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
214    { 0,    NULL,           NULL }    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
215      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
216      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
217      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
218      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
219      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
220      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
221      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
222      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
223      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
224      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
225      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
226      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
227      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
228      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
229      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
230      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
231      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
232      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
233      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
234    #ifdef JFRIEDL_DEBUG
235      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
236    #endif
237      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
238      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
239      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
240      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
241      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
242      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
243      { OP_NODATA,    0,        NULL,               NULL,            NULL }
244  };  };
245    
246    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
247    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
248    that the combination of -w and -x has the same effect as -x on its own, so we
249    can treat them as the same. */
250    
251    static const char *prefix[] = {
252      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
253    
254    static const char *suffix[] = {
255      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
256    
257    /* UTF-8 tables - used only when the newline setting is "any". */
258    
259    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
260    
261    const char utf8_table4[] = {
262      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
265      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
266    
267    
268    
269  /*************************************************  /*************************************************
270  *       Functions for directory scanning         *  *            OS-specific functions               *
271  *************************************************/  *************************************************/
272    
273  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
274  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
275    
276    
277  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
278    
279  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
280  #include <sys/types.h>  #include <sys/types.h>
281  #include <sys/stat.h>  #include <sys/stat.h>
282  #include <dirent.h>  #include <dirent.h>
283    
284  typedef DIR directory_type;  typedef DIR directory_type;
285    
286  int  static int
287  isdirectory(char *filename)  isdirectory(char *filename)
288  {  {
289  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 292  if (stat(filename, &statbuf) < 0)
292  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
293  }  }
294    
295  directory_type *  static directory_type *
296  opendirectory(char *filename)  opendirectory(char *filename)
297  {  {
298  return opendir(filename);  return opendir(filename);
299  }  }
300    
301  char *  static char *
302  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
303  {  {
304  for (;;)  for (;;)
# Line 108  for (;;) Line 308  for (;;)
308    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
309      return dent->d_name;      return dent->d_name;
310    }    }
311    /* Control never reaches here */
312    }
313    
314    static void
315    closedirectory(directory_type *dir)
316    {
317    closedir(dir);
318    }
319    
320    
321    /************* Test for regular file in Unix **********/
322    
323    static int
324    isregfile(char *filename)
325    {
326    struct stat statbuf;
327    if (stat(filename, &statbuf) < 0)
328      return 1;        /* In the expectation that opening as a file will fail */
329    return (statbuf.st_mode & S_IFMT) == S_IFREG;
330    }
331    
332    
333    /************* Test stdout for being a terminal in Unix **********/
334    
335    static BOOL
336    is_stdout_tty(void)
337    {
338    return isatty(fileno(stdout));
339    }
340    
341    
342    /************* Directory scanning in Win32 ***********/
343    
344    /* I (Philip Hazel) have no means of testing this code. It was contributed by
345    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
346    when it did not exist. David Byron added a patch that moved the #include of
347    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
348    */
349    
350    #elif HAVE_WINDOWS_H
351    
352    #ifndef STRICT
353    # define STRICT
354    #endif
355    #ifndef WIN32_LEAN_AND_MEAN
356    # define WIN32_LEAN_AND_MEAN
357    #endif
358    
359    #include <windows.h>
360    
361    #ifndef INVALID_FILE_ATTRIBUTES
362    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
363    #endif
364    
365    typedef struct directory_type
366    {
367    HANDLE handle;
368    BOOL first;
369    WIN32_FIND_DATA data;
370    } directory_type;
371    
372    int
373    isdirectory(char *filename)
374    {
375    DWORD attr = GetFileAttributes(filename);
376    if (attr == INVALID_FILE_ATTRIBUTES)
377      return 0;
378    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
379    }
380    
381    directory_type *
382    opendirectory(char *filename)
383    {
384    size_t len;
385    char *pattern;
386    directory_type *dir;
387    DWORD err;
388    len = strlen(filename);
389    pattern = (char *) malloc(len + 3);
390    dir = (directory_type *) malloc(sizeof(*dir));
391    if ((pattern == NULL) || (dir == NULL))
392      {
393      fprintf(stderr, "pcregrep: malloc failed\n");
394      exit(2);
395      }
396    memcpy(pattern, filename, len);
397    memcpy(&(pattern[len]), "\\*", 3);
398    dir->handle = FindFirstFile(pattern, &(dir->data));
399    if (dir->handle != INVALID_HANDLE_VALUE)
400      {
401      free(pattern);
402      dir->first = TRUE;
403      return dir;
404      }
405    err = GetLastError();
406    free(pattern);
407    free(dir);
408    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
409    return NULL;
410    }
411    
412    char *
413    readdirectory(directory_type *dir)
414    {
415    for (;;)
416      {
417      if (!dir->first)
418        {
419        if (!FindNextFile(dir->handle, &(dir->data)))
420          return NULL;
421        }
422      else
423        {
424        dir->first = FALSE;
425        }
426      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
427        return dir->data.cFileName;
428      }
429    #ifndef _MSC_VER
430  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
431    #endif
432  }  }
433    
434  void  void
435  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
436  {  {
437  closedir(dir);  FindClose(dir->handle);
438    free(dir);
439  }  }
440    
441    
442  #else  /************* Test for regular file in Win32 **********/
443    
444    /* I don't know how to do this, or if it can be done; assume all paths are
445    regular if they are not directories. */
446    
447    int isregfile(char *filename)
448    {
449    return !isdirectory(filename);
450    }
451    
452    
453    /************* Test stdout for being a terminal in Win32 **********/
454    
455    /* I don't know how to do this; assume never */
456    
457    static BOOL
458    is_stdout_tty(void)
459    {
460    return FALSE;
461    }
462    
463    
464  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
465    
466  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
467    
468    #else
469    
470  typedef void directory_type;  typedef void directory_type;
471    
472  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
473  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
474  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
475  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
476    
477    
478    /************* Test for regular when we can't do it **********/
479    
480    /* Assume all files are regular. */
481    
482    int isregfile(char *filename) { return 1; }
483    
484    
485    /************* Test stdout for being a terminal when we can't do it **********/
486    
487    static BOOL
488    is_stdout_tty(void)
489    {
490    return FALSE;
491    }
492    
493    
494  #endif  #endif
495    
496    
497    
498  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
499  /*************************************************  /*************************************************
500  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
501  *************************************************/  *************************************************/
# Line 159  return sys_errlist[n]; Line 518  return sys_errlist[n];
518    
519    
520  /*************************************************  /*************************************************
521  *              Grep an individual file           *  *             Find end of line                   *
522  *************************************************/  *************************************************/
523    
524  static int  /* The length of the endline sequence that is found is set via lenptr. This may
525  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
526  {  
527  int rc = 1;  Arguments:
528  int linenumber = 0;    p         current position in line
529  int count = 0;    endptr    end of available data
530  int offsets[99];    lenptr    where to put the length of the eol sequence
 char buffer[BUFSIZ];  
531    
532  while (fgets(buffer, sizeof(buffer), in) != NULL)  Returns:    pointer to the last byte of the line
533    */
534    
535    static char *
536    end_of_line(char *p, char *endptr, int *lenptr)
537    {
538    switch(endlinetype)
539    {    {
540    BOOL match = FALSE;    default:      /* Just in case */
541    int i;    case EL_LF:
542    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
543    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
544    linenumber++;      {
545        *lenptr = 1;
546        return p + 1;
547        }
548      *lenptr = 0;
549      return endptr;
550    
551    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
552      while (p < endptr && *p != '\r') p++;
553      if (p < endptr)
554      {      {
555      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
556        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
557      }      }
558      *lenptr = 0;
559      return endptr;
560    
561    if (match != invert)    case EL_CRLF:
562      for (;;)
563      {      {
564      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
565        if (++p >= endptr)
566          {
567          *lenptr = 0;
568          return endptr;
569          }
570        if (*p == '\n')
571          {
572          *lenptr = 2;
573          return p + 1;
574          }
575        }
576      break;
577    
578      case EL_ANYCRLF:
579      while (p < endptr)
580        {
581        int extra = 0;
582        register int c = *((unsigned char *)p);
583    
584      else if (filenames_only)      if (utf8 && c >= 0xc0)
585        {        {
586        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
587        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
588          gcss = 6*extra;
589          c = (c & utf8_table3[extra]) << gcss;
590          for (gcii = 1; gcii <= extra; gcii++)
591            {
592            gcss -= 6;
593            c |= (p[gcii] & 0x3f) << gcss;
594            }
595        }        }
596    
597      else if (silent) return 0;      p += 1 + extra;
598    
599      else      switch (c)
600        {        {
601        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
602        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
603        fprintf(stdout, "%s\n", buffer);        return p;
604    
605          case 0x0d:    /* CR */
606          if (p < endptr && *p == 0x0a)
607            {
608            *lenptr = 2;
609            p++;
610            }
611          else *lenptr = 1;
612          return p;
613    
614          default:
615          break;
616        }        }
617        }   /* End of loop for ANYCRLF case */
618    
619      rc = 0;    *lenptr = 0;  /* Must have hit the end */
620      }    return endptr;
   }  
621    
622  if (count_only)    case EL_ANY:
623    {    while (p < endptr)
624    if (name != NULL) fprintf(stdout, "%s:", name);      {
625    fprintf(stdout, "%d\n", count);      int extra = 0;
626    }      register int c = *((unsigned char *)p);
627    
628  return rc;      if (utf8 && c >= 0xc0)
629  }        {
630          int gcii, gcss;
631          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
632          gcss = 6*extra;
633          c = (c & utf8_table3[extra]) << gcss;
634          for (gcii = 1; gcii <= extra; gcii++)
635            {
636            gcss -= 6;
637            c |= (p[gcii] & 0x3f) << gcss;
638            }
639          }
640    
641        p += 1 + extra;
642    
643        switch (c)
644          {
645          case 0x0a:    /* LF */
646          case 0x0b:    /* VT */
647          case 0x0c:    /* FF */
648          *lenptr = 1;
649          return p;
650    
651          case 0x0d:    /* CR */
652          if (p < endptr && *p == 0x0a)
653            {
654            *lenptr = 2;
655            p++;
656            }
657          else *lenptr = 1;
658          return p;
659    
660          case 0x85:    /* NEL */
661          *lenptr = utf8? 2 : 1;
662          return p;
663    
664          case 0x2028:  /* LS */
665          case 0x2029:  /* PS */
666          *lenptr = 3;
667          return p;
668    
669          default:
670          break;
671          }
672        }   /* End of loop for ANY case */
673    
674      *lenptr = 0;  /* Must have hit the end */
675      return endptr;
676      }     /* End of overall switch */
677    }
678    
679    
680    
681  /*************************************************  /*************************************************
682  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
683  *************************************************/  *************************************************/
684    
685  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
686    
687  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
688  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
689      startptr  start of available data
690    
691  if ((sep = isdirectory(filename)) != 0 && recurse)  Returns:    pointer to the start of the previous line
692    */
693    
694    static char *
695    previous_line(char *p, char *startptr)
696    {
697    switch(endlinetype)
698    {    {
699    char buffer[1024];    default:      /* Just in case */
700    char *nextfile;    case EL_LF:
701    directory_type *dir = opendirectory(filename);    p--;
702      while (p > startptr && p[-1] != '\n') p--;
703      return p;
704    
705      case EL_CR:
706      p--;
707      while (p > startptr && p[-1] != '\n') p--;
708      return p;
709    
710    if (dir == NULL)    case EL_CRLF:
711      for (;;)
712      {      {
713      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,      p -= 2;
714        strerror(errno));      while (p > startptr && p[-1] != '\n') p--;
715      return 2;      if (p <= startptr + 1 || p[-2] == '\r') return p;
716      }      }
717      return p;   /* But control should never get here */
718    
719    while ((nextfile = readdirectory(dir)) != NULL)    case EL_ANY:
720      case EL_ANYCRLF:
721      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
722      if (utf8) while ((*p & 0xc0) == 0x80) p--;
723    
724      while (p > startptr)
725      {      {
726      int frc;      register int c;
727      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      char *pp = p - 1;
     frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);  
     if (frc == 0 && rc == 1) rc = 0;  
     }  
728    
729    closedirectory(dir);      if (utf8)
730    return rc;        {
731    }        int extra = 0;
732          while ((*pp & 0xc0) == 0x80) pp--;
733          c = *((unsigned char *)pp);
734          if (c >= 0xc0)
735            {
736            int gcii, gcss;
737            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
738            gcss = 6*extra;
739            c = (c & utf8_table3[extra]) << gcss;
740            for (gcii = 1; gcii <= extra; gcii++)
741              {
742              gcss -= 6;
743              c |= (pp[gcii] & 0x3f) << gcss;
744              }
745            }
746          }
747        else c = *((unsigned char *)pp);
748    
749  /* If the file is not a directory, or we are not recursing, scan it. If this is      if (endlinetype == EL_ANYCRLF) switch (c)
750  the first and only argument at top level, we don't show the file name.        {
751  Otherwise, control is via the show_filenames variable. */        case 0x0a:    /* LF */
752          case 0x0d:    /* CR */
753          return p;
754    
755  in = fopen(filename, "r");        default:
756  if (in == NULL)        break;
757    {        }
   fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));  
   return 2;  
   }  
758    
759  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);      else switch (c)
760  fclose(in);        {
761  return rc;        case 0x0a:    /* LF */
762          case 0x0b:    /* VT */
763          case 0x0c:    /* FF */
764          case 0x0d:    /* CR */
765          case 0x85:    /* NEL */
766          case 0x2028:  /* LS */
767          case 0x2029:  /* PS */
768          return p;
769    
770          default:
771          break;
772          }
773    
774        p = pp;  /* Back one character */
775        }        /* End of loop for ANY case */
776    
777      return startptr;  /* Hit start of data */
778      }     /* End of overall switch */
779  }  }
780    
781    
782    
783    
784    
785  /*************************************************  /*************************************************
786  *                Usage function                  *  *       Print the previous "after" lines         *
787  *************************************************/  *************************************************/
788    
789  static int  /* This is called if we are about to lose said lines because of buffer filling,
790  usage(int rc)  and at the end of the file. The data in the line is written using fwrite() so
791    that a binary zero does not terminate it.
792    
793    Arguments:
794      lastmatchnumber   the number of the last matching line, plus one
795      lastmatchrestart  where we restarted after the last match
796      endptr            end of available data
797      printname         filename for printing
798    
799    Returns:            nothing
800    */
801    
802    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
803      char *endptr, char *printname)
804  {  {
805  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  if (after_context > 0 && lastmatchnumber > 0)
806  fprintf(stderr, "Type `pcregrep --help' for more information.\n");    {
807  return rc;    int count = 0;
808      while (lastmatchrestart < endptr && count++ < after_context)
809        {
810        int ellength;
811        char *pp = lastmatchrestart;
812        if (printname != NULL) fprintf(stdout, "%s-", printname);
813        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
814        pp = end_of_line(pp, endptr, &ellength);
815        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
816        lastmatchrestart = pp;
817        }
818      hyphenpending = TRUE;
819      }
820  }  }
821    
822    
823    
   
824  /*************************************************  /*************************************************
825  *                Help function                   *  *   Apply patterns to subject till one matches   *
826  *************************************************/  *************************************************/
827    
828  static void  /* This function is called to run through all patterns, looking for a match. It
829  help(void)  is used multiple times for the same subject when colouring is enabled, in order
830  {  to find all possible matches.
831  option_item *op;  
832    Arguments:
833  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");    matchptr    the start of the subject
834  printf("Search for PATTERN in each FILE or standard input.\n");    length      the length of the subject to match
835  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");    offsets     the offets vector to fill in
836      mrc         address of where to put the result of pcre_exec()
837  printf("Options:\n");  
838    Returns:      TRUE if there was a match
839                  FALSE if there was no match
840                  invert if there was a non-fatal error
841    */
842    
843  for (op = optionlist; op->one_char != 0; op++)  static BOOL
844    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
845    {
846    int i;
847    for (i = 0; i < pattern_count; i++)
848    {    {
849    int n;    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
850    char s[4];      PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
851    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (*mrc >= 0) return TRUE;
852    printf("  %s --%s%n", s, op->long_name, &n);    if (*mrc == PCRE_ERROR_NOMATCH) continue;
853    n = 30 - n;    fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
854    if (n < 1) n = 1;    if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
855    printf("%.*s%s\n", n, "                    ", op->help_text);    fprintf(stderr, "this text:\n");
856      fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */
857      fprintf(stderr, "\n");
858      if (error_count == 0 &&
859          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
860        {
861        fprintf(stderr, "pcregrep: error %d means that a resource limit "
862          "was exceeded\n", *mrc);
863        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
864        }
865      if (error_count++ > 20)
866        {
867        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
868        exit(2);
869        }
870      return invert;    /* No more matching; don't show the line again */
871    }    }
872    
873  printf("\n  -f<filename>  or  --file=<filename>\n");  return FALSE;  /* No match, no errors */
 printf("    Read patterns from <filename> instead of using a command line option.\n");  
 printf("    Trailing white space is removed; blanks lines are ignored.\n");  
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
874  }  }
875    
876    
877    
   
878  /*************************************************  /*************************************************
879  *                Handle an option                *  *            Grep an individual file             *
880  *************************************************/  *************************************************/
881    
882    /* This is called from grep_or_recurse() below. It uses a buffer that is three
883    times the value of MBUFTHIRD. The matching point is never allowed to stray into
884    the top third of the buffer, thus keeping more of the file available for
885    context printing or for multiline scanning. For large files, the pointer will
886    be in the middle third most of the time, so the bottom third is available for
887    "before" context printing.
888    
889    Arguments:
890      handle       the fopened FILE stream for a normal file
891                   the gzFile pointer when reading is via libz
892                   the BZFILE pointer when reading is via libbz2
893      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
894      printname    the file name if it is to be printed for each match
895                   or NULL if the file name is not to be printed
896                   it cannot be NULL if filenames[_nomatch]_only is set
897    
898    Returns:       0 if there was at least one match
899                   1 otherwise (no matches)
900                   2 if there is a read error on a .bz2 file
901    */
902    
903  static int  static int
904  handle_option(int letter, int options)  pcregrep(void *handle, int frtype, char *printname)
905  {  {
906  switch(letter)  int rc = 1;
907    {  int linenumber = 1;
908    case -1:  help(); exit(0);  int lastmatchnumber = 0;
909    case 'c': count_only = TRUE; break;  int count = 0;
910    case 'h': filenames = FALSE; break;  int filepos = 0;
911    case 'i': options |= PCRE_CASELESS; break;  int offsets[OFFSET_SIZE];
912    case 'l': filenames_only = TRUE;  char *lastmatchrestart = NULL;
913    case 'n': number = TRUE; break;  char buffer[3*MBUFTHIRD];
914    case 'r': recurse = TRUE; break;  char *ptr = buffer;
915    char *endptr;
916    size_t bufflength;
917    BOOL endhyphenpending = FALSE;
918    FILE *in = NULL;                    /* Ensure initialized */
919    
920    #ifdef SUPPORT_LIBZ
921    gzFile ingz = NULL;
922    #endif
923    
924    #ifdef SUPPORT_LIBBZ2
925    BZFILE *inbz2 = NULL;
926    #endif
927    
928    
929    /* Do the first read into the start of the buffer and set up the pointer to end
930    of what we have. In the case of libz, a non-zipped .gz file will be read as a
931    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
932    fail. */
933    
934    #ifdef SUPPORT_LIBZ
935    if (frtype == FR_LIBZ)
936      {
937      ingz = (gzFile)handle;
938      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
939      }
940    else
941    #endif
942    
943    #ifdef SUPPORT_LIBBZ2
944    if (frtype == FR_LIBBZ2)
945      {
946      inbz2 = (BZFILE *)handle;
947      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
948      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
949      }                                    /* without the cast it is unsigned. */
950    else
951    #endif
952    
953      {
954      in = (FILE *)handle;
955      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
956      }
957    
958    endptr = buffer + bufflength;
959    
960    /* Loop while the current pointer is not at the end of the file. For large
961    files, endptr will be at the end of the buffer when we are in the middle of the
962    file, but ptr will never get there, because as soon as it gets over 2/3 of the
963    way, the buffer is shifted left and re-filled. */
964    
965    while (ptr < endptr)
966      {
967      int endlinelength;
968      int mrc = 0;
969      BOOL match;
970      char *matchptr = ptr;
971      char *t = ptr;
972      size_t length, linelength;
973    
974      /* At this point, ptr is at the start of a line. We need to find the length
975      of the subject string to pass to pcre_exec(). In multiline mode, it is the
976      length remainder of the data in the buffer. Otherwise, it is the length of
977      the next line, excluding the terminating newline. After matching, we always
978      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
979      option is used for compiling, so that any match is constrained to be in the
980      first line. */
981    
982      t = end_of_line(t, endptr, &endlinelength);
983      linelength = t - ptr - endlinelength;
984      length = multiline? (size_t)(endptr - ptr) : linelength;
985    
986      /* Extra processing for Jeffrey Friedl's debugging. */
987    
988    #ifdef JFRIEDL_DEBUG
989      if (jfriedl_XT || jfriedl_XR)
990      {
991          #include <sys/time.h>
992          #include <time.h>
993          struct timeval start_time, end_time;
994          struct timezone dummy;
995          int i;
996    
997          if (jfriedl_XT)
998          {
999              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1000              const char *orig = ptr;
1001              ptr = malloc(newlen + 1);
1002              if (!ptr) {
1003                      printf("out of memory");
1004                      exit(2);
1005              }
1006              endptr = ptr;
1007              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1008              for (i = 0; i < jfriedl_XT; i++) {
1009                      strncpy(endptr, orig,  length);
1010                      endptr += length;
1011              }
1012              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1013              length = newlen;
1014          }
1015    
1016          if (gettimeofday(&start_time, &dummy) != 0)
1017                  perror("bad gettimeofday");
1018    
1019    
1020          for (i = 0; i < jfriedl_XR; i++)
1021              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1022                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1023    
1024          if (gettimeofday(&end_time, &dummy) != 0)
1025                  perror("bad gettimeofday");
1026    
1027          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1028                          -
1029                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1030    
1031          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1032          return 0;
1033      }
1034    #endif
1035    
1036      /* We come back here after a match when the -o option (only_matching) is set,
1037      in order to find any further matches in the same line. */
1038    
1039      ONLY_MATCHING_RESTART:
1040    
1041      /* Run through all the patterns until one matches or there is an error other
1042      than NOMATCH. This code is in a subroutine so that it can be re-used for
1043      finding subsequent matches when colouring matched lines. */
1044    
1045      match = match_patterns(matchptr, length, offsets, &mrc);
1046    
1047      /* If it's a match or a not-match (as required), do what's wanted. */
1048    
1049      if (match != invert)
1050        {
1051        BOOL hyphenprinted = FALSE;
1052    
1053        /* We've failed if we want a file that doesn't have any matches. */
1054    
1055        if (filenames == FN_NOMATCH_ONLY) return 1;
1056    
1057        /* Just count if just counting is wanted. */
1058    
1059        if (count_only) count++;
1060    
1061        /* If all we want is a file name, there is no need to scan any more lines
1062        in the file. */
1063    
1064        else if (filenames == FN_ONLY)
1065          {
1066          fprintf(stdout, "%s\n", printname);
1067          return 0;
1068          }
1069    
1070        /* Likewise, if all we want is a yes/no answer. */
1071    
1072        else if (quiet) return 0;
1073    
1074        /* The --only-matching option prints just the substring that matched, and
1075        the --file-offsets and --line-offsets options output offsets for the
1076        matching substring (they both force --only-matching). None of these options
1077        prints any context. Afterwards, adjust the start and length, and then jump
1078        back to look for further matches in the same line. If we are in invert
1079        mode, however, nothing is printed - this could be still useful because the
1080        return code is set. */
1081    
1082        else if (only_matching)
1083          {
1084          if (!invert)
1085            {
1086            if (printname != NULL) fprintf(stdout, "%s:", printname);
1087            if (number) fprintf(stdout, "%d:", linenumber);
1088            if (line_offsets)
1089              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1090                offsets[1] - offsets[0]);
1091            else if (file_offsets)
1092              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1093                offsets[1] - offsets[0]);
1094            else
1095              {
1096              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1097              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1098              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1099              }
1100            fprintf(stdout, "\n");
1101            matchptr += offsets[1];
1102            length -= offsets[1];
1103            match = FALSE;
1104            goto ONLY_MATCHING_RESTART;
1105            }
1106          }
1107    
1108        /* This is the default case when none of the above options is set. We print
1109        the matching lines(s), possibly preceded and/or followed by other lines of
1110        context. */
1111    
1112        else
1113          {
1114          /* See if there is a requirement to print some "after" lines from a
1115          previous match. We never print any overlaps. */
1116    
1117          if (after_context > 0 && lastmatchnumber > 0)
1118            {
1119            int ellength;
1120            int linecount = 0;
1121            char *p = lastmatchrestart;
1122    
1123            while (p < ptr && linecount < after_context)
1124              {
1125              p = end_of_line(p, ptr, &ellength);
1126              linecount++;
1127              }
1128    
1129            /* It is important to advance lastmatchrestart during this printing so
1130            that it interacts correctly with any "before" printing below. Print
1131            each line's data using fwrite() in case there are binary zeroes. */
1132    
1133            while (lastmatchrestart < p)
1134              {
1135              char *pp = lastmatchrestart;
1136              if (printname != NULL) fprintf(stdout, "%s-", printname);
1137              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1138              pp = end_of_line(pp, endptr, &ellength);
1139              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1140              lastmatchrestart = pp;
1141              }
1142            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1143            }
1144    
1145          /* If there were non-contiguous lines printed above, insert hyphens. */
1146    
1147          if (hyphenpending)
1148            {
1149            fprintf(stdout, "--\n");
1150            hyphenpending = FALSE;
1151            hyphenprinted = TRUE;
1152            }
1153    
1154          /* See if there is a requirement to print some "before" lines for this
1155          match. Again, don't print overlaps. */
1156    
1157          if (before_context > 0)
1158            {
1159            int linecount = 0;
1160            char *p = ptr;
1161    
1162            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1163                   linecount < before_context)
1164              {
1165              linecount++;
1166              p = previous_line(p, buffer);
1167              }
1168    
1169            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1170              fprintf(stdout, "--\n");
1171    
1172            while (p < ptr)
1173              {
1174              int ellength;
1175              char *pp = p;
1176              if (printname != NULL) fprintf(stdout, "%s-", printname);
1177              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1178              pp = end_of_line(pp, endptr, &ellength);
1179              fwrite(p, 1, pp - p, stdout);
1180              p = pp;
1181              }
1182            }
1183    
1184          /* Now print the matching line(s); ensure we set hyphenpending at the end
1185          of the file if any context lines are being output. */
1186    
1187          if (after_context > 0 || before_context > 0)
1188            endhyphenpending = TRUE;
1189    
1190          if (printname != NULL) fprintf(stdout, "%s:", printname);
1191          if (number) fprintf(stdout, "%d:", linenumber);
1192    
1193          /* In multiline mode, we want to print to the end of the line in which
1194          the end of the matched string is found, so we adjust linelength and the
1195          line number appropriately, but only when there actually was a match
1196          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1197          the match will always be before the first newline sequence. */
1198    
1199          if (multiline)
1200            {
1201            int ellength;
1202            char *endmatch = ptr;
1203            if (!invert)
1204              {
1205              endmatch += offsets[1];
1206              t = ptr;
1207              while (t < endmatch)
1208                {
1209                t = end_of_line(t, endptr, &ellength);
1210                if (t <= endmatch) linenumber++; else break;
1211                }
1212              }
1213            endmatch = end_of_line(endmatch, endptr, &ellength);
1214            linelength = endmatch - ptr - ellength;
1215            }
1216    
1217          /*** NOTE: Use only fwrite() to output the data line, so that binary
1218          zeroes are treated as just another data character. */
1219    
1220          /* This extra option, for Jeffrey Friedl's debugging requirements,
1221          replaces the matched string, or a specific captured string if it exists,
1222          with X. When this happens, colouring is ignored. */
1223    
1224    #ifdef JFRIEDL_DEBUG
1225          if (S_arg >= 0 && S_arg < mrc)
1226            {
1227            int first = S_arg * 2;
1228            int last  = first + 1;
1229            fwrite(ptr, 1, offsets[first], stdout);
1230            fprintf(stdout, "X");
1231            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1232            }
1233          else
1234    #endif
1235    
1236          /* We have to split the line(s) up if colouring, and search for further
1237          matches. */
1238    
1239          if (do_colour)
1240            {
1241            int last_offset = 0;
1242            fwrite(ptr, 1, offsets[0], stdout);
1243            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1244            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1245            fprintf(stdout, "%c[00m", 0x1b);
1246            for (;;)
1247              {
1248              last_offset += offsets[1];
1249              matchptr += offsets[1];
1250              length -= offsets[1];
1251              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1252              fwrite(matchptr, 1, offsets[0], stdout);
1253              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1254              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1255              fprintf(stdout, "%c[00m", 0x1b);
1256              }
1257            fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1258              stdout);
1259            }
1260    
1261          /* Not colouring; no need to search for further matches */
1262    
1263          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1264          }
1265    
1266        /* End of doing what has to be done for a match */
1267    
1268        rc = 0;    /* Had some success */
1269    
1270        /* Remember where the last match happened for after_context. We remember
1271        where we are about to restart, and that line's number. */
1272    
1273        lastmatchrestart = ptr + linelength + endlinelength;
1274        lastmatchnumber = linenumber + 1;
1275        }
1276    
1277      /* For a match in multiline inverted mode (which of course did not cause
1278      anything to be printed), we have to move on to the end of the match before
1279      proceeding. */
1280    
1281      if (multiline && invert && match)
1282        {
1283        int ellength;
1284        char *endmatch = ptr + offsets[1];
1285        t = ptr;
1286        while (t < endmatch)
1287          {
1288          t = end_of_line(t, endptr, &ellength);
1289          if (t <= endmatch) linenumber++; else break;
1290          }
1291        endmatch = end_of_line(endmatch, endptr, &ellength);
1292        linelength = endmatch - ptr - ellength;
1293        }
1294    
1295      /* Advance to after the newline and increment the line number. The file
1296      offset to the current line is maintained in filepos. */
1297    
1298      ptr += linelength + endlinelength;
1299      filepos += linelength + endlinelength;
1300      linenumber++;
1301    
1302      /* If we haven't yet reached the end of the file (the buffer is full), and
1303      the current point is in the top 1/3 of the buffer, slide the buffer down by
1304      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1305      about to be lost, print them. */
1306    
1307      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1308        {
1309        if (after_context > 0 &&
1310            lastmatchnumber > 0 &&
1311            lastmatchrestart < buffer + MBUFTHIRD)
1312          {
1313          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1314          lastmatchnumber = 0;
1315          }
1316    
1317        /* Now do the shuffle */
1318    
1319        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1320        ptr -= MBUFTHIRD;
1321    
1322    #ifdef SUPPORT_LIBZ
1323        if (frtype == FR_LIBZ)
1324          bufflength = 2*MBUFTHIRD +
1325            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1326        else
1327    #endif
1328    
1329    #ifdef SUPPORT_LIBBZ2
1330        if (frtype == FR_LIBBZ2)
1331          bufflength = 2*MBUFTHIRD +
1332            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1333        else
1334    #endif
1335    
1336        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1337    
1338        endptr = buffer + bufflength;
1339    
1340        /* Adjust any last match point */
1341    
1342        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1343        }
1344      }     /* Loop through the whole file */
1345    
1346    /* End of file; print final "after" lines if wanted; do_after_lines sets
1347    hyphenpending if it prints something. */
1348    
1349    if (!only_matching && !count_only)
1350      {
1351      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1352      hyphenpending |= endhyphenpending;
1353      }
1354    
1355    /* Print the file name if we are looking for those without matches and there
1356    were none. If we found a match, we won't have got this far. */
1357    
1358    if (filenames == FN_NOMATCH_ONLY)
1359      {
1360      fprintf(stdout, "%s\n", printname);
1361      return 0;
1362      }
1363    
1364    /* Print the match count if wanted */
1365    
1366    if (count_only)
1367      {
1368      if (printname != NULL) fprintf(stdout, "%s:", printname);
1369      fprintf(stdout, "%d\n", count);
1370      }
1371    
1372    return rc;
1373    }
1374    
1375    
1376    
1377    /*************************************************
1378    *     Grep a file or recurse into a directory    *
1379    *************************************************/
1380    
1381    /* Given a path name, if it's a directory, scan all the files if we are
1382    recursing; if it's a file, grep it.
1383    
1384    Arguments:
1385      pathname          the path to investigate
1386      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1387      only_one_at_top   TRUE if the path is the only one at toplevel
1388    
1389    Returns:   0 if there was at least one match
1390               1 if there were no matches
1391               2 there was some kind of error
1392    
1393    However, file opening failures are suppressed if "silent" is set.
1394    */
1395    
1396    static int
1397    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1398    {
1399    int rc = 1;
1400    int sep;
1401    int frtype;
1402    int pathlen;
1403    void *handle;
1404    FILE *in = NULL;           /* Ensure initialized */
1405    
1406    #ifdef SUPPORT_LIBZ
1407    gzFile ingz = NULL;
1408    #endif
1409    
1410    #ifdef SUPPORT_LIBBZ2
1411    BZFILE *inbz2 = NULL;
1412    #endif
1413    
1414    /* If the file name is "-" we scan stdin */
1415    
1416    if (strcmp(pathname, "-") == 0)
1417      {
1418      return pcregrep(stdin, FR_PLAIN,
1419        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1420          stdin_name : NULL);
1421      }
1422    
1423    /* If the file is a directory, skip if skipping or if we are recursing, scan
1424    each file and directory within it, subject to any include or exclude patterns
1425    that were set. The scanning code is localized so it can be made
1426    system-specific. */
1427    
1428    if ((sep = isdirectory(pathname)) != 0)
1429      {
1430      if (dee_action == dee_SKIP) return 1;
1431      if (dee_action == dee_RECURSE)
1432        {
1433        char buffer[1024];
1434        char *nextfile;
1435        directory_type *dir = opendirectory(pathname);
1436    
1437        if (dir == NULL)
1438          {
1439          if (!silent)
1440            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1441              strerror(errno));
1442          return 2;
1443          }
1444    
1445        while ((nextfile = readdirectory(dir)) != NULL)
1446          {
1447          int frc, nflen;
1448          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1449          nflen = strlen(nextfile);
1450    
1451          if (isdirectory(buffer))
1452            {
1453            if (exclude_dir_compiled != NULL &&
1454                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1455              continue;
1456    
1457            if (include_dir_compiled != NULL &&
1458                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1459              continue;
1460            }
1461          else
1462            {
1463            if (exclude_compiled != NULL &&
1464                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1465              continue;
1466    
1467            if (include_compiled != NULL &&
1468                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1469              continue;
1470            }
1471    
1472          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1473          if (frc > 1) rc = frc;
1474           else if (frc == 0 && rc == 1) rc = 0;
1475          }
1476    
1477        closedirectory(dir);
1478        return rc;
1479        }
1480      }
1481    
1482    /* If the file is not a directory and not a regular file, skip it if that's
1483    been requested. */
1484    
1485    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1486    
1487    /* Control reaches here if we have a regular file, or if we have a directory
1488    and recursion or skipping was not requested, or if we have anything else and
1489    skipping was not requested. The scan proceeds. If this is the first and only
1490    argument at top level, we don't show the file name, unless we are only showing
1491    the file name, or the filename was forced (-H). */
1492    
1493    pathlen = strlen(pathname);
1494    
1495    /* Open using zlib if it is supported and the file name ends with .gz. */
1496    
1497    #ifdef SUPPORT_LIBZ
1498    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1499      {
1500      ingz = gzopen(pathname, "rb");
1501      if (ingz == NULL)
1502        {
1503        if (!silent)
1504          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1505            strerror(errno));
1506        return 2;
1507        }
1508      handle = (void *)ingz;
1509      frtype = FR_LIBZ;
1510      }
1511    else
1512    #endif
1513    
1514    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1515    
1516    #ifdef SUPPORT_LIBBZ2
1517    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1518      {
1519      inbz2 = BZ2_bzopen(pathname, "rb");
1520      handle = (void *)inbz2;
1521      frtype = FR_LIBBZ2;
1522      }
1523    else
1524    #endif
1525    
1526    /* Otherwise use plain fopen(). The label is so that we can come back here if
1527    an attempt to read a .bz2 file indicates that it really is a plain file. */
1528    
1529    #ifdef SUPPORT_LIBBZ2
1530    PLAIN_FILE:
1531    #endif
1532      {
1533      in = fopen(pathname, "r");
1534      handle = (void *)in;
1535      frtype = FR_PLAIN;
1536      }
1537    
1538    /* All the opening methods return errno when they fail. */
1539    
1540    if (handle == NULL)
1541      {
1542      if (!silent)
1543        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1544          strerror(errno));
1545      return 2;
1546      }
1547    
1548    /* Now grep the file */
1549    
1550    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1551      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1552    
1553    /* Close in an appropriate manner. */
1554    
1555    #ifdef SUPPORT_LIBZ
1556    if (frtype == FR_LIBZ)
1557      gzclose(ingz);
1558    else
1559    #endif
1560    
1561    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1562    read failed. If the error indicates that the file isn't in fact bzipped, try
1563    again as a normal file. */
1564    
1565    #ifdef SUPPORT_LIBBZ2
1566    if (frtype == FR_LIBBZ2)
1567      {
1568      if (rc == 2)
1569        {
1570        int errnum;
1571        const char *err = BZ2_bzerror(inbz2, &errnum);
1572        if (errnum == BZ_DATA_ERROR_MAGIC)
1573          {
1574          BZ2_bzclose(inbz2);
1575          goto PLAIN_FILE;
1576          }
1577        else if (!silent)
1578          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1579            pathname, err);
1580        }
1581      BZ2_bzclose(inbz2);
1582      }
1583    else
1584    #endif
1585    
1586    /* Normal file close */
1587    
1588    fclose(in);
1589    
1590    /* Pass back the yield from pcregrep(). */
1591    
1592    return rc;
1593    }
1594    
1595    
1596    
1597    
1598    /*************************************************
1599    *                Usage function                  *
1600    *************************************************/
1601    
1602    static int
1603    usage(int rc)
1604    {
1605    option_item *op;
1606    fprintf(stderr, "Usage: pcregrep [-");
1607    for (op = optionlist; op->one_char != 0; op++)
1608      {
1609      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1610      }
1611    fprintf(stderr, "] [long options] [pattern] [files]\n");
1612    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1613      "options.\n");
1614    return rc;
1615    }
1616    
1617    
1618    
1619    
1620    /*************************************************
1621    *                Help function                   *
1622    *************************************************/
1623    
1624    static void
1625    help(void)
1626    {
1627    option_item *op;
1628    
1629    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1630    printf("Search for PATTERN in each FILE or standard input.\n");
1631    printf("PATTERN must be present if neither -e nor -f is used.\n");
1632    printf("\"-\" can be used as a file name to mean STDIN.\n");
1633    
1634    #ifdef SUPPORT_LIBZ
1635    printf("Files whose names end in .gz are read using zlib.\n");
1636    #endif
1637    
1638    #ifdef SUPPORT_LIBBZ2
1639    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1640    #endif
1641    
1642    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1643    printf("Other files and the standard input are read as plain files.\n\n");
1644    #else
1645    printf("All files are read as plain files, without any interpretation.\n\n");
1646    #endif
1647    
1648    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1649    printf("Options:\n");
1650    
1651    for (op = optionlist; op->one_char != 0; op++)
1652      {
1653      int n;
1654      char s[4];
1655      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1656      n = 30 - printf("  %s --%s", s, op->long_name);
1657      if (n < 1) n = 1;
1658      printf("%.*s%s\n", n, "                    ", op->help_text);
1659      }
1660    
1661    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1662    printf("trailing white space is removed and blank lines are ignored.\n");
1663    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1664    
1665    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1666    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1667    }
1668    
1669    
1670    
1671    
1672    /*************************************************
1673    *    Handle a single-letter, no data option      *
1674    *************************************************/
1675    
1676    static int
1677    handle_option(int letter, int options)
1678    {
1679    switch(letter)
1680      {
1681      case N_FOFFSETS: file_offsets = TRUE; break;
1682      case N_HELP: help(); exit(0);
1683      case N_LOFFSETS: line_offsets = number = TRUE; break;
1684      case 'c': count_only = TRUE; break;
1685      case 'F': process_options |= PO_FIXED_STRINGS; break;
1686      case 'H': filenames = FN_FORCE; break;
1687      case 'h': filenames = FN_NONE; break;
1688      case 'i': options |= PCRE_CASELESS; break;
1689      case 'l': filenames = FN_ONLY; break;
1690      case 'L': filenames = FN_NOMATCH_ONLY; break;
1691      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1692      case 'n': number = TRUE; break;
1693      case 'o': only_matching = TRUE; break;
1694      case 'q': quiet = TRUE; break;
1695      case 'r': dee_action = dee_RECURSE; break;
1696    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1697      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1698    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1699    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1700      case 'x': process_options |= PO_LINE_MATCH; break;
1701    
1702      case 'V':
1703      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1704      exit(0);
1705      break;
1706    
1707      default:
1708      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1709      exit(usage(2));
1710      }
1711    
1712    return options;
1713    }
1714    
1715    
1716    
1717    
1718    /*************************************************
1719    *          Construct printed ordinal             *
1720    *************************************************/
1721    
1722    case 'V':  /* This turns a number into "1st", "3rd", etc. */
   fprintf(stderr, "pcregrep version %s using ", VERSION);  
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
   exit(0);  
   break;  
1723    
1724    default:  static char *
1725    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);  ordin(int n)
1726    exit(usage(2));  {
1727    static char buffer[8];
1728    char *p = buffer;
1729    sprintf(p, "%d", n);
1730    while (*p != 0) p++;
1731    switch (n%10)
1732      {
1733      case 1: strcpy(p, "st"); break;
1734      case 2: strcpy(p, "nd"); break;
1735      case 3: strcpy(p, "rd"); break;
1736      default: strcpy(p, "th"); break;
1737    }    }
1738    return buffer;
1739    }
1740    
1741  return options;  
1742    
1743    /*************************************************
1744    *          Compile a single pattern              *
1745    *************************************************/
1746    
1747    /* When the -F option has been used, this is called for each substring.
1748    Otherwise it's called for each supplied pattern.
1749    
1750    Arguments:
1751      pattern        the pattern string
1752      options        the PCRE options
1753      filename       the file name, or NULL for a command-line pattern
1754      count          0 if this is the only command line pattern, or
1755                     number of the command line pattern, or
1756                     linenumber for a pattern from a file
1757    
1758    Returns:         TRUE on success, FALSE after an error
1759    */
1760    
1761    static BOOL
1762    compile_single_pattern(char *pattern, int options, char *filename, int count)
1763    {
1764    char buffer[MBUFTHIRD + 16];
1765    const char *error;
1766    int errptr;
1767    
1768    if (pattern_count >= MAX_PATTERN_COUNT)
1769      {
1770      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1771        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1772      return FALSE;
1773      }
1774    
1775    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1776      suffix[process_options]);
1777    pattern_list[pattern_count] =
1778      pcre_compile(buffer, options, &error, &errptr, pcretables);
1779    if (pattern_list[pattern_count] != NULL)
1780      {
1781      pattern_count++;
1782      return TRUE;
1783      }
1784    
1785    /* Handle compile errors */
1786    
1787    errptr -= (int)strlen(prefix[process_options]);
1788    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1789    
1790    if (filename == NULL)
1791      {
1792      if (count == 0)
1793        fprintf(stderr, "pcregrep: Error in command-line regex "
1794          "at offset %d: %s\n", errptr, error);
1795      else
1796        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1797          "at offset %d: %s\n", ordin(count), errptr, error);
1798      }
1799    else
1800      {
1801      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1802        "at offset %d: %s\n", count, filename, errptr, error);
1803      }
1804    
1805    return FALSE;
1806  }  }
1807    
1808    
1809    
1810    /*************************************************
1811    *           Compile one supplied pattern         *
1812    *************************************************/
1813    
1814    /* When the -F option has been used, each string may be a list of strings,
1815    separated by line breaks. They will be matched literally.
1816    
1817    Arguments:
1818      pattern        the pattern string
1819      options        the PCRE options
1820      filename       the file name, or NULL for a command-line pattern
1821      count          0 if this is the only command line pattern, or
1822                     number of the command line pattern, or
1823                     linenumber for a pattern from a file
1824    
1825    Returns:         TRUE on success, FALSE after an error
1826    */
1827    
1828    static BOOL
1829    compile_pattern(char *pattern, int options, char *filename, int count)
1830    {
1831    if ((process_options & PO_FIXED_STRINGS) != 0)
1832      {
1833      char *eop = pattern + strlen(pattern);
1834      char buffer[MBUFTHIRD];
1835      for(;;)
1836        {
1837        int ellength;
1838        char *p = end_of_line(pattern, eop, &ellength);
1839        if (ellength == 0)
1840          return compile_single_pattern(pattern, options, filename, count);
1841        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1842        pattern = p;
1843        if (!compile_single_pattern(buffer, options, filename, count))
1844          return FALSE;
1845        }
1846      }
1847    else return compile_single_pattern(pattern, options, filename, count);
1848    }
1849    
1850    
1851    
1852  /*************************************************  /*************************************************
1853  *                Main program                    *  *                Main program                    *
1854  *************************************************/  *************************************************/
1855    
1856    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1857    
1858  int  int
1859  main(int argc, char **argv)  main(int argc, char **argv)
1860  {  {
1861  int i, j;  int i, j;
1862  int rc = 1;  int rc = 1;
1863  int options = 0;  int pcre_options = 0;
1864    int cmd_pattern_count = 0;
1865    int hint_count = 0;
1866  int errptr;  int errptr;
 const char *error;  
1867  BOOL only_one_at_top;  BOOL only_one_at_top;
1868    char *patterns[MAX_PATTERN_COUNT];
1869    const char *locale_from = "--locale";
1870    const char *error;
1871    
1872    /* Set the default line ending value from the default in the PCRE library;
1873    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1874    */
1875    
1876    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1877    switch(i)
1878      {
1879      default:                 newline = (char *)"lf"; break;
1880      case '\r':               newline = (char *)"cr"; break;
1881      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1882      case -1:                 newline = (char *)"any"; break;
1883      case -2:                 newline = (char *)"anycrlf"; break;
1884      }
1885    
1886  /* Process the options */  /* Process the options */
1887    
1888  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
1889    {    {
1890      option_item *op = NULL;
1891      char *option_data = (char *)"";    /* default to keep compiler happy */
1892      BOOL longop;
1893      BOOL longopwasequals = FALSE;
1894    
1895    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1896    
1897    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1898      but only if we have previously had -e or -f to define the patterns. */
1899    
1900      if (argv[i][1] == 0)
1901        {
1902        if (pattern_filename != NULL || pattern_count > 0) break;
1903          else exit(usage(2));
1904        }
1905    
1906      /* Handle a long name option, or -- to terminate the options */
1907    
1908    if (argv[i][1] == '-')    if (argv[i][1] == '-')
1909      {      {
1910      option_item *op;      char *arg = argv[i] + 2;
1911        char *argequals = strchr(arg, '=');
1912    
1913      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
1914        {        {
1915        pattern_filename = argv[i] + 7;        i++;
1916        continue;        break;                /* out of the options-handling loop */
1917        }        }
1918    
1919        longop = TRUE;
1920    
1921        /* Some long options have data that follows after =, for example file=name.
1922        Some options have variations in the long name spelling: specifically, we
1923        allow "regexp" because GNU grep allows it, though I personally go along
1924        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1925        These options are entered in the table as "regex(p)". No option is in both
1926        these categories, fortunately. */
1927    
1928      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1929        {        {
1930        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1931          char *equals = strchr(op->long_name, '=');
1932          if (opbra == NULL)     /* Not a (p) case */
1933          {          {
1934          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1935          break;            {
1936              if (strcmp(arg, op->long_name) == 0) break;
1937              }
1938            else                 /* Special case xxx=data */
1939              {
1940              int oplen = equals - op->long_name;
1941              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1942              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1943                {
1944                option_data = arg + arglen;
1945                if (*option_data == '=')
1946                  {
1947                  option_data++;
1948                  longopwasequals = TRUE;
1949                  }
1950                break;
1951                }
1952              }
1953            }
1954          else                   /* Special case xxxx(p) */
1955            {
1956            char buff1[24];
1957            char buff2[24];
1958            int baselen = opbra - op->long_name;
1959            sprintf(buff1, "%.*s", baselen, op->long_name);
1960            sprintf(buff2, "%s%.*s", buff1,
1961              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1962            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1963              break;
1964          }          }
1965        }        }
1966    
1967      if (op->one_char == 0)      if (op->one_char == 0)
1968        {        {
1969        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 417  for (i = 1; i < argc; i++) Line 1971  for (i = 1; i < argc; i++)
1971        }        }
1972      }      }
1973    
1974    /* One-char options */  
1975      /* Jeffrey Friedl's debugging harness uses these additional options which
1976      are not in the right form for putting in the option table because they use
1977      only one hyphen, yet are more than one character long. By putting them
1978      separately here, they will not get displayed as part of the help() output,
1979      but I don't think Jeffrey will care about that. */
1980    
1981    #ifdef JFRIEDL_DEBUG
1982      else if (strcmp(argv[i], "-pre") == 0) {
1983              jfriedl_prefix = argv[++i];
1984              continue;
1985      } else if (strcmp(argv[i], "-post") == 0) {
1986              jfriedl_postfix = argv[++i];
1987              continue;
1988      } else if (strcmp(argv[i], "-XT") == 0) {
1989              sscanf(argv[++i], "%d", &jfriedl_XT);
1990              continue;
1991      } else if (strcmp(argv[i], "-XR") == 0) {
1992              sscanf(argv[++i], "%d", &jfriedl_XR);
1993              continue;
1994      }
1995    #endif
1996    
1997    
1998      /* One-char options; many that have no data may be in a single argument; we
1999      continue till we hit the last one or one that needs data. */
2000    
2001    else    else
2002      {      {
2003      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2004        longop = FALSE;
2005      while (*s != 0)      while (*s != 0)
2006        {        {
2007        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2008            { if (*s == op->one_char) break; }
2009          if (op->one_char == 0)
2010          {          {
2011          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2012          if (pattern_filename[0] == 0)            *s, argv[i]);
2013            {          exit(usage(2));
2014            if (i >= argc - 1)          }
2015              {        if (op->type != OP_NODATA || s[1] == 0)
2016              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
2017              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
2018          break;          break;
2019          }          }
2020        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
2021        }        }
2022      }      }
   }  
2023    
2024  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
2025  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
2026      something in the PCRE options. */
2027    
2028  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
2029    {      {
2030    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
2031    return 2;      continue;
2032    }      }
2033    
2034  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2035      either has a value or defaults to something. It cannot have data in a
2036      separate item. At the moment, the only such options are "colo(u)r" and
2037      Jeffrey Friedl's special -S debugging option. */
2038    
2039  if (pattern_filename != NULL)    if (*option_data == 0 &&
2040    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
2041      {      {
2042      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
2043        strerror(errno));        {
2044      return 2;        case N_COLOUR:
2045          colour_option = (char *)"auto";
2046          break;
2047    #ifdef JFRIEDL_DEBUG
2048          case 'S':
2049          S_arg = 0;
2050          break;
2051    #endif
2052          }
2053        continue;
2054      }      }
2055    while (fgets(buffer, sizeof(buffer), f) != NULL)  
2056      /* Otherwise, find the data string for the option. */
2057    
2058      if (*option_data == 0)
2059      {      {
2060      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
2061      if (pattern_count >= MAX_PATTERN_COUNT)        {
2062          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2063          exit(usage(2));
2064          }
2065        option_data = argv[++i];
2066        }
2067    
2068      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2069      multiple times to create a list of patterns. */
2070    
2071      if (op->type == OP_PATLIST)
2072        {
2073        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2074        {        {
2075        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2076          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2077        return 2;        return 2;
2078        }        }
2079      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2080      if (s == buffer) continue;      }
2081      *s = 0;  
2082      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2083        &errptr, NULL);  
2084      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2085        {
2086        *((char **)op->dataptr) = option_data;
2087        }
2088      else
2089        {
2090        char *endptr;
2091        int n = strtoul(option_data, &endptr, 10);
2092        if (*endptr != 0)
2093        {        {
2094        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
2095          pattern_count, errptr, error);          {
2096        return 2;          char *equals = strchr(op->long_name, '=');
2097            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2098              equals - op->long_name;
2099            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2100              option_data, nlen, op->long_name);
2101            }
2102          else
2103            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2104              option_data, op->one_char);
2105          exit(usage(2));
2106        }        }
2107        *((int *)op->dataptr) = n;
2108        }
2109      }
2110    
2111    /* Options have been decoded. If -C was used, its value is used as a default
2112    for -A and -B. */
2113    
2114    if (both_context > 0)
2115      {
2116      if (after_context == 0) after_context = both_context;
2117      if (before_context == 0) before_context = both_context;
2118      }
2119    
2120    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2121    However, the latter two set the only_matching flag. */
2122    
2123    if ((only_matching && (file_offsets || line_offsets)) ||
2124        (file_offsets && line_offsets))
2125      {
2126      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2127        "and/or --line-offsets\n");
2128      exit(usage(2));
2129      }
2130    
2131    if (file_offsets || line_offsets) only_matching = TRUE;
2132    
2133    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2134    LC_ALL environment variable is set, and if so, use it. */
2135    
2136    if (locale == NULL)
2137      {
2138      locale = getenv("LC_ALL");
2139      locale_from = "LCC_ALL";
2140      }
2141    
2142    if (locale == NULL)
2143      {
2144      locale = getenv("LC_CTYPE");
2145      locale_from = "LC_CTYPE";
2146      }
2147    
2148    /* If a locale has been provided, set it, and generate the tables the PCRE
2149    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2150    
2151    if (locale != NULL)
2152      {
2153      if (setlocale(LC_CTYPE, locale) == NULL)
2154        {
2155        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2156          locale, locale_from);
2157        return 2;
2158        }
2159      pcretables = pcre_maketables();
2160      }
2161    
2162    /* Sort out colouring */
2163    
2164    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2165      {
2166      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2167      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2168      else
2169        {
2170        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2171          colour_option);
2172        return 2;
2173        }
2174      if (do_colour)
2175        {
2176        char *cs = getenv("PCREGREP_COLOUR");
2177        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2178        if (cs != NULL) colour_string = cs;
2179      }      }
   fclose(f);  
2180    }    }
2181    
2182  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2183    
2184    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2185      {
2186      pcre_options |= PCRE_NEWLINE_CR;
2187      endlinetype = EL_CR;
2188      }
2189    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2190      {
2191      pcre_options |= PCRE_NEWLINE_LF;
2192      endlinetype = EL_LF;
2193      }
2194    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2195      {
2196      pcre_options |= PCRE_NEWLINE_CRLF;
2197      endlinetype = EL_CRLF;
2198      }
2199    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2200      {
2201      pcre_options |= PCRE_NEWLINE_ANY;
2202      endlinetype = EL_ANY;
2203      }
2204    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2205      {
2206      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2207      endlinetype = EL_ANYCRLF;
2208      }
2209  else  else
2210    {    {
2211    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2212    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2213    if (pattern_list[0] == NULL)    }
2214    
2215    /* Interpret the text values for -d and -D */
2216    
2217    if (dee_option != NULL)
2218      {
2219      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2220      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2221      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2222      else
2223        {
2224        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2225        return 2;
2226        }
2227      }
2228    
2229    if (DEE_option != NULL)
2230      {
2231      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2232      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2233      else
2234      {      {
2235      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
       error);  
2236      return 2;      return 2;
2237      }      }
   pattern_count++;  
2238    }    }
2239    
2240  /* Study the regular expressions, as we will be running them may times */  /* Check the values for Jeffrey Friedl's debugging options. */
2241    
2242    #ifdef JFRIEDL_DEBUG
2243    if (S_arg > 9)
2244      {
2245      fprintf(stderr, "pcregrep: bad value for -S option\n");
2246      return 2;
2247      }
2248    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2249      {
2250      if (jfriedl_XT == 0) jfriedl_XT = 1;
2251      if (jfriedl_XR == 0) jfriedl_XR = 1;
2252      }
2253    #endif
2254    
2255    /* Get memory to store the pattern and hints lists. */
2256    
2257    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2258    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2259    
2260    if (pattern_list == NULL || hints_list == NULL)
2261      {
2262      fprintf(stderr, "pcregrep: malloc failed\n");
2263      goto EXIT2;
2264      }
2265    
2266    /* If no patterns were provided by -e, and there is no file provided by -f,
2267    the first argument is the one and only pattern, and it must exist. */
2268    
2269    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2270      {
2271      if (i >= argc) return usage(2);
2272      patterns[cmd_pattern_count++] = argv[i++];
2273      }
2274    
2275    /* Compile the patterns that were provided on the command line, either by
2276    multiple uses of -e or as a single unkeyed pattern. */
2277    
2278    for (j = 0; j < cmd_pattern_count; j++)
2279      {
2280      if (!compile_pattern(patterns[j], pcre_options, NULL,
2281           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2282        goto EXIT2;
2283      }
2284    
2285    /* Compile the regular expressions that are provided in a file. */
2286    
2287    if (pattern_filename != NULL)
2288      {
2289      int linenumber = 0;
2290      FILE *f;
2291      char *filename;
2292      char buffer[MBUFTHIRD];
2293    
2294      if (strcmp(pattern_filename, "-") == 0)
2295        {
2296        f = stdin;
2297        filename = stdin_name;
2298        }
2299      else
2300        {
2301        f = fopen(pattern_filename, "r");
2302        if (f == NULL)
2303          {
2304          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2305            strerror(errno));
2306          goto EXIT2;
2307          }
2308        filename = pattern_filename;
2309        }
2310    
2311      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2312        {
2313        char *s = buffer + (int)strlen(buffer);
2314        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2315        *s = 0;
2316        linenumber++;
2317        if (buffer[0] == 0) continue;   /* Skip blank lines */
2318        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2319          goto EXIT2;
2320        }
2321    
2322      if (f != stdin) fclose(f);
2323      }
2324    
2325    /* Study the regular expressions, as we will be running them many times */
2326    
2327  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2328    {    {
# Line 513  for (j = 0; j < pattern_count; j++) Line 2332  for (j = 0; j < pattern_count; j++)
2332      char s[16];      char s[16];
2333      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2334      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2335      return 2;      goto EXIT2;
2336        }
2337      hint_count++;
2338      }
2339    
2340    /* If there are include or exclude patterns, compile them. */
2341    
2342    if (exclude_pattern != NULL)
2343      {
2344      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2345        pcretables);
2346      if (exclude_compiled == NULL)
2347        {
2348        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2349          errptr, error);
2350        goto EXIT2;
2351        }
2352      }
2353    
2354    if (include_pattern != NULL)
2355      {
2356      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2357        pcretables);
2358      if (include_compiled == NULL)
2359        {
2360        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2361          errptr, error);
2362        goto EXIT2;
2363        }
2364      }
2365    
2366    if (exclude_dir_pattern != NULL)
2367      {
2368      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2369        pcretables);
2370      if (exclude_dir_compiled == NULL)
2371        {
2372        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2373          errptr, error);
2374        goto EXIT2;
2375        }
2376      }
2377    
2378    if (include_dir_pattern != NULL)
2379      {
2380      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2381        pcretables);
2382      if (include_dir_compiled == NULL)
2383        {
2384        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2385          errptr, error);
2386        goto EXIT2;
2387      }      }
2388    }    }
2389    
2390  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2391    
2392  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2393      {
2394      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2395      goto EXIT;
2396      }
2397    
2398  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2399  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2400  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2401    otherwise forced. */
2402    
2403  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2404    
2405  for (; i < argc; i++)  for (; i < argc; i++)
2406    {    {
2407    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2408    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2409      if (frc > 1) rc = frc;
2410        else if (frc == 0 && rc == 1) rc = 0;
2411    }    }
2412    
2413    EXIT:
2414    if (pattern_list != NULL)
2415      {
2416      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2417      free(pattern_list);
2418      }
2419    if (hints_list != NULL)
2420      {
2421      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2422      free(hints_list);
2423      }
2424  return rc;  return rc;
2425    
2426    EXIT2:
2427    rc = 2;
2428    goto EXIT;
2429  }  }
2430    
2431  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.379

  ViewVC Help
Powered by ViewVC 1.1.5