/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 421 by ph10, Fri Aug 14 15:43:27 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2009 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 70  its pattern matching. On a Unix system i
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "2.0 01-Aug-2001"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define MBUFTHIRD BUFSIZ
78    #else
79    #define MBUFTHIRD 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    
108    
109  /*************************************************  /*************************************************
110  *               Global variables                 *  *               Global variables                 *
111  *************************************************/  *************************************************/
112    
113    /* Jeffrey Friedl has some debugging requirements that are not part of the
114    regular code. */
115    
116    #ifdef JFRIEDL_DEBUG
117    static int S_arg = -1;
118    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120    static const char *jfriedl_prefix = "";
121    static const char *jfriedl_postfix = "";
122    #endif
123    
124    static int  endlinetype;
125    
126    static char *colour_string = (char *)"1;31";
127    static char *colour_option = NULL;
128    static char *dee_option = NULL;
129    static char *DEE_option = NULL;
130    static char *newline = NULL;
131  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
132    static char *stdin_name = (char *)"(standard input)";
133    static char *locale = NULL;
134    
135    static const unsigned char *pcretables = NULL;
136    
137  static int  pattern_count = 0;  static int  pattern_count = 0;
138  static pcre **pattern_list;  static pcre **pattern_list = NULL;
139  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
140    
141    static char *include_pattern = NULL;
142    static char *exclude_pattern = NULL;
143    static char *include_dir_pattern = NULL;
144    static char *exclude_dir_pattern = NULL;
145    
146    static pcre *include_compiled = NULL;
147    static pcre *exclude_compiled = NULL;
148    static pcre *include_dir_compiled = NULL;
149    static pcre *exclude_dir_compiled = NULL;
150    
151    static int after_context = 0;
152    static int before_context = 0;
153    static int both_context = 0;
154    static int dee_action = dee_READ;
155    static int DEE_action = DEE_READ;
156    static int error_count = 0;
157    static int filenames = FN_DEFAULT;
158    static int process_options = 0;
159    
160  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
161  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
162  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
163    static BOOL hyphenpending = FALSE;
164  static BOOL invert = FALSE;  static BOOL invert = FALSE;
165    static BOOL line_offsets = FALSE;
166    static BOOL multiline = FALSE;
167  static BOOL number = FALSE;  static BOOL number = FALSE;
168  static BOOL recurse = FALSE;  static BOOL omit_zero_count = FALSE;
169    static BOOL only_matching = FALSE;
170    static BOOL quiet = FALSE;
171  static BOOL silent = FALSE;  static BOOL silent = FALSE;
172  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
173    
174  /* Structure for options and list of them */  /* Structure for options and list of them */
175    
176    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
177           OP_PATLIST };
178    
179  typedef struct option_item {  typedef struct option_item {
180      int type;
181    int one_char;    int one_char;
182    char *long_name;    void *dataptr;
183    char *help_text;    const char *long_name;
184      const char *help_text;
185  } option_item;  } option_item;
186    
187    /* Options without a single-letter equivalent get a negative value. This can be
188    used to identify them. */
189    
190    #define N_COLOUR       (-1)
191    #define N_EXCLUDE      (-2)
192    #define N_EXCLUDE_DIR  (-3)
193    #define N_HELP         (-4)
194    #define N_INCLUDE      (-5)
195    #define N_INCLUDE_DIR  (-6)
196    #define N_LABEL        (-7)
197    #define N_LOCALE       (-8)
198    #define N_NULL         (-9)
199    #define N_LOFFSETS     (-10)
200    #define N_FOFFSETS     (-11)
201    
202  static option_item optionlist[] = {  static option_item optionlist[] = {
203    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
204    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
205    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
206    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
207    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
208    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
209    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
210    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
211    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
212    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
213    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
214    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
215    { 0,    NULL,           NULL }    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
216      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
217      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
218      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
219      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
220      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
221      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
222      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
223      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
224      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
225      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
226      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
227      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
228      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
229      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
230      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
231      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
232      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
233      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
234      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
235    #ifdef JFRIEDL_DEBUG
236      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
237    #endif
238      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
239      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
240      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
241      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
242      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
243      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
244      { OP_NODATA,    0,        NULL,               NULL,            NULL }
245  };  };
246    
247    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
248    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
249    that the combination of -w and -x has the same effect as -x on its own, so we
250    can treat them as the same. */
251    
252    static const char *prefix[] = {
253      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
254    
255    static const char *suffix[] = {
256      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
257    
258    /* UTF-8 tables - used only when the newline setting is "any". */
259    
260    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
261    
262    const char utf8_table4[] = {
263      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
265      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
266      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
267    
268    
269    
270  /*************************************************  /*************************************************
271  *       Functions for directory scanning         *  *            OS-specific functions               *
272  *************************************************/  *************************************************/
273    
274  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
275  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
276    
277    
278  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
279    
280  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
281  #include <sys/types.h>  #include <sys/types.h>
282  #include <sys/stat.h>  #include <sys/stat.h>
283  #include <dirent.h>  #include <dirent.h>
284    
285  typedef DIR directory_type;  typedef DIR directory_type;
286    
287  int  static int
288  isdirectory(char *filename)  isdirectory(char *filename)
289  {  {
290  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 293  if (stat(filename, &statbuf) < 0)
293  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
294  }  }
295    
296  directory_type *  static directory_type *
297  opendirectory(char *filename)  opendirectory(char *filename)
298  {  {
299  return opendir(filename);  return opendir(filename);
300  }  }
301    
302  char *  static char *
303  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
304  {  {
305  for (;;)  for (;;)
# Line 108  for (;;) Line 309  for (;;)
309    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
310      return dent->d_name;      return dent->d_name;
311    }    }
312    /* Control never reaches here */
313    }
314    
315    static void
316    closedirectory(directory_type *dir)
317    {
318    closedir(dir);
319    }
320    
321    
322    /************* Test for regular file in Unix **********/
323    
324    static int
325    isregfile(char *filename)
326    {
327    struct stat statbuf;
328    if (stat(filename, &statbuf) < 0)
329      return 1;        /* In the expectation that opening as a file will fail */
330    return (statbuf.st_mode & S_IFMT) == S_IFREG;
331    }
332    
333    
334    /************* Test stdout for being a terminal in Unix **********/
335    
336    static BOOL
337    is_stdout_tty(void)
338    {
339    return isatty(fileno(stdout));
340    }
341    
342    
343    /************* Directory scanning in Win32 ***********/
344    
345    /* I (Philip Hazel) have no means of testing this code. It was contributed by
346    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
347    when it did not exist. David Byron added a patch that moved the #include of
348    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
349    */
350    
351    #elif HAVE_WINDOWS_H
352    
353    #ifndef STRICT
354    # define STRICT
355    #endif
356    #ifndef WIN32_LEAN_AND_MEAN
357    # define WIN32_LEAN_AND_MEAN
358    #endif
359    
360    #include <windows.h>
361    
362    #ifndef INVALID_FILE_ATTRIBUTES
363    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
364    #endif
365    
366    typedef struct directory_type
367    {
368    HANDLE handle;
369    BOOL first;
370    WIN32_FIND_DATA data;
371    } directory_type;
372    
373    int
374    isdirectory(char *filename)
375    {
376    DWORD attr = GetFileAttributes(filename);
377    if (attr == INVALID_FILE_ATTRIBUTES)
378      return 0;
379    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
380    }
381    
382    directory_type *
383    opendirectory(char *filename)
384    {
385    size_t len;
386    char *pattern;
387    directory_type *dir;
388    DWORD err;
389    len = strlen(filename);
390    pattern = (char *) malloc(len + 3);
391    dir = (directory_type *) malloc(sizeof(*dir));
392    if ((pattern == NULL) || (dir == NULL))
393      {
394      fprintf(stderr, "pcregrep: malloc failed\n");
395      exit(2);
396      }
397    memcpy(pattern, filename, len);
398    memcpy(&(pattern[len]), "\\*", 3);
399    dir->handle = FindFirstFile(pattern, &(dir->data));
400    if (dir->handle != INVALID_HANDLE_VALUE)
401      {
402      free(pattern);
403      dir->first = TRUE;
404      return dir;
405      }
406    err = GetLastError();
407    free(pattern);
408    free(dir);
409    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
410    return NULL;
411    }
412    
413    char *
414    readdirectory(directory_type *dir)
415    {
416    for (;;)
417      {
418      if (!dir->first)
419        {
420        if (!FindNextFile(dir->handle, &(dir->data)))
421          return NULL;
422        }
423      else
424        {
425        dir->first = FALSE;
426        }
427      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
428        return dir->data.cFileName;
429      }
430    #ifndef _MSC_VER
431  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
432    #endif
433  }  }
434    
435  void  void
436  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
437  {  {
438  closedir(dir);  FindClose(dir->handle);
439    free(dir);
440  }  }
441    
442    
443  #else  /************* Test for regular file in Win32 **********/
444    
445    /* I don't know how to do this, or if it can be done; assume all paths are
446    regular if they are not directories. */
447    
448    int isregfile(char *filename)
449    {
450    return !isdirectory(filename);
451    }
452    
453    
454    /************* Test stdout for being a terminal in Win32 **********/
455    
456    /* I don't know how to do this; assume never */
457    
458    static BOOL
459    is_stdout_tty(void)
460    {
461    return FALSE;
462    }
463    
464    
465  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
466    
467  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
468    
469    #else
470    
471  typedef void directory_type;  typedef void directory_type;
472    
473  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
474  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
475  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
476  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
477    
478    
479    /************* Test for regular when we can't do it **********/
480    
481    /* Assume all files are regular. */
482    
483    int isregfile(char *filename) { return 1; }
484    
485    
486    /************* Test stdout for being a terminal when we can't do it **********/
487    
488    static BOOL
489    is_stdout_tty(void)
490    {
491    return FALSE;
492    }
493    
494    
495  #endif  #endif
496    
497    
498    
499  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
500  /*************************************************  /*************************************************
501  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
502  *************************************************/  *************************************************/
# Line 159  return sys_errlist[n]; Line 519  return sys_errlist[n];
519    
520    
521  /*************************************************  /*************************************************
522  *              Grep an individual file           *  *             Find end of line                   *
523  *************************************************/  *************************************************/
524    
525  static int  /* The length of the endline sequence that is found is set via lenptr. This may
526  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
527  {  
528  int rc = 1;  Arguments:
529  int linenumber = 0;    p         current position in line
530  int count = 0;    endptr    end of available data
531  int offsets[99];    lenptr    where to put the length of the eol sequence
 char buffer[BUFSIZ];  
532    
533  while (fgets(buffer, sizeof(buffer), in) != NULL)  Returns:    pointer to the last byte of the line
534    */
535    
536    static char *
537    end_of_line(char *p, char *endptr, int *lenptr)
538    {
539    switch(endlinetype)
540    {    {
541    BOOL match = FALSE;    default:      /* Just in case */
542    int i;    case EL_LF:
543    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
544    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
545    linenumber++;      {
546        *lenptr = 1;
547        return p + 1;
548        }
549      *lenptr = 0;
550      return endptr;
551    
552    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
553      while (p < endptr && *p != '\r') p++;
554      if (p < endptr)
555      {      {
556      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
557        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
558      }      }
559      *lenptr = 0;
560      return endptr;
561    
562    if (match != invert)    case EL_CRLF:
563      for (;;)
564      {      {
565      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
566        if (++p >= endptr)
567          {
568          *lenptr = 0;
569          return endptr;
570          }
571        if (*p == '\n')
572          {
573          *lenptr = 2;
574          return p + 1;
575          }
576        }
577      break;
578    
579      case EL_ANYCRLF:
580      while (p < endptr)
581        {
582        int extra = 0;
583        register int c = *((unsigned char *)p);
584    
585      else if (filenames_only)      if (utf8 && c >= 0xc0)
586        {        {
587        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
588        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
589          gcss = 6*extra;
590          c = (c & utf8_table3[extra]) << gcss;
591          for (gcii = 1; gcii <= extra; gcii++)
592            {
593            gcss -= 6;
594            c |= (p[gcii] & 0x3f) << gcss;
595            }
596        }        }
597    
598      else if (silent) return 0;      p += 1 + extra;
599    
600      else      switch (c)
601        {        {
602        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
603        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
604        fprintf(stdout, "%s\n", buffer);        return p;
605    
606          case 0x0d:    /* CR */
607          if (p < endptr && *p == 0x0a)
608            {
609            *lenptr = 2;
610            p++;
611            }
612          else *lenptr = 1;
613          return p;
614    
615          default:
616          break;
617        }        }
618        }   /* End of loop for ANYCRLF case */
619    
620      rc = 0;    *lenptr = 0;  /* Must have hit the end */
621      }    return endptr;
   }  
622    
623  if (count_only)    case EL_ANY:
624    {    while (p < endptr)
625    if (name != NULL) fprintf(stdout, "%s:", name);      {
626    fprintf(stdout, "%d\n", count);      int extra = 0;
627    }      register int c = *((unsigned char *)p);
628    
629  return rc;      if (utf8 && c >= 0xc0)
630  }        {
631          int gcii, gcss;
632          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
633          gcss = 6*extra;
634          c = (c & utf8_table3[extra]) << gcss;
635          for (gcii = 1; gcii <= extra; gcii++)
636            {
637            gcss -= 6;
638            c |= (p[gcii] & 0x3f) << gcss;
639            }
640          }
641    
642        p += 1 + extra;
643    
644        switch (c)
645          {
646          case 0x0a:    /* LF */
647          case 0x0b:    /* VT */
648          case 0x0c:    /* FF */
649          *lenptr = 1;
650          return p;
651    
652          case 0x0d:    /* CR */
653          if (p < endptr && *p == 0x0a)
654            {
655            *lenptr = 2;
656            p++;
657            }
658          else *lenptr = 1;
659          return p;
660    
661          case 0x85:    /* NEL */
662          *lenptr = utf8? 2 : 1;
663          return p;
664    
665          case 0x2028:  /* LS */
666          case 0x2029:  /* PS */
667          *lenptr = 3;
668          return p;
669    
670          default:
671          break;
672          }
673        }   /* End of loop for ANY case */
674    
675      *lenptr = 0;  /* Must have hit the end */
676      return endptr;
677      }     /* End of overall switch */
678    }
679    
680    
681    
682  /*************************************************  /*************************************************
683  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
684  *************************************************/  *************************************************/
685    
686  static int  /* This is called when looking back for before lines to print.
687  grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  
688    BOOL only_one_at_top)  Arguments:
689  {    p         start of the subsequent line
690  int rc = 1;    startptr  start of available data
 int sep;  
 FILE *in;  
691    
692  /* If the file is a directory and we are recursing, scan each file within it.  Returns:    pointer to the start of the previous line
693  The scanning code is localized so it can be made system-specific. */  */
694    
695  if ((sep = isdirectory(filename)) != 0 && recurse)  static char *
696    previous_line(char *p, char *startptr)
697    {
698    switch(endlinetype)
699    {    {
700    char buffer[1024];    default:      /* Just in case */
701    char *nextfile;    case EL_LF:
702    directory_type *dir = opendirectory(filename);    p--;
703      while (p > startptr && p[-1] != '\n') p--;
704      return p;
705    
706      case EL_CR:
707      p--;
708      while (p > startptr && p[-1] != '\n') p--;
709      return p;
710    
711    if (dir == NULL)    case EL_CRLF:
712      for (;;)
713      {      {
714      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,      p -= 2;
715        strerror(errno));      while (p > startptr && p[-1] != '\n') p--;
716      return 2;      if (p <= startptr + 1 || p[-2] == '\r') return p;
717      }      }
718      return p;   /* But control should never get here */
719    
720      case EL_ANY:
721      case EL_ANYCRLF:
722      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
723      if (utf8) while ((*p & 0xc0) == 0x80) p--;
724    
725    while ((nextfile = readdirectory(dir)) != NULL)    while (p > startptr)
726      {      {
727      int frc;      register int c;
728      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      char *pp = p - 1;
     frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);  
     if (frc == 0 && rc == 1) rc = 0;  
     }  
729    
730    closedirectory(dir);      if (utf8)
731    return rc;        {
732    }        int extra = 0;
733          while ((*pp & 0xc0) == 0x80) pp--;
734          c = *((unsigned char *)pp);
735          if (c >= 0xc0)
736            {
737            int gcii, gcss;
738            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
739            gcss = 6*extra;
740            c = (c & utf8_table3[extra]) << gcss;
741            for (gcii = 1; gcii <= extra; gcii++)
742              {
743              gcss -= 6;
744              c |= (pp[gcii] & 0x3f) << gcss;
745              }
746            }
747          }
748        else c = *((unsigned char *)pp);
749    
750  /* If the file is not a directory, or we are not recursing, scan it. If this is      if (endlinetype == EL_ANYCRLF) switch (c)
751  the first and only argument at top level, we don't show the file name.        {
752  Otherwise, control is via the show_filenames variable. */        case 0x0a:    /* LF */
753          case 0x0d:    /* CR */
754          return p;
755    
756  in = fopen(filename, "r");        default:
757  if (in == NULL)        break;
758    {        }
   fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));  
   return 2;  
   }  
759    
760  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);      else switch (c)
761  fclose(in);        {
762  return rc;        case 0x0a:    /* LF */
763          case 0x0b:    /* VT */
764          case 0x0c:    /* FF */
765          case 0x0d:    /* CR */
766          case 0x85:    /* NEL */
767          case 0x2028:  /* LS */
768          case 0x2029:  /* PS */
769          return p;
770    
771          default:
772          break;
773          }
774    
775        p = pp;  /* Back one character */
776        }        /* End of loop for ANY case */
777    
778      return startptr;  /* Hit start of data */
779      }     /* End of overall switch */
780  }  }
781    
782    
783    
784    
785    
786  /*************************************************  /*************************************************
787  *                Usage function                  *  *       Print the previous "after" lines         *
788  *************************************************/  *************************************************/
789    
790  static int  /* This is called if we are about to lose said lines because of buffer filling,
791  usage(int rc)  and at the end of the file. The data in the line is written using fwrite() so
792    that a binary zero does not terminate it.
793    
794    Arguments:
795      lastmatchnumber   the number of the last matching line, plus one
796      lastmatchrestart  where we restarted after the last match
797      endptr            end of available data
798      printname         filename for printing
799    
800    Returns:            nothing
801    */
802    
803    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
804      char *endptr, char *printname)
805  {  {
806  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  if (after_context > 0 && lastmatchnumber > 0)
807  fprintf(stderr, "Type `pcregrep --help' for more information.\n");    {
808  return rc;    int count = 0;
809      while (lastmatchrestart < endptr && count++ < after_context)
810        {
811        int ellength;
812        char *pp = lastmatchrestart;
813        if (printname != NULL) fprintf(stdout, "%s-", printname);
814        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
815        pp = end_of_line(pp, endptr, &ellength);
816        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
817        lastmatchrestart = pp;
818        }
819      hyphenpending = TRUE;
820      }
821  }  }
822    
823    
824    
   
825  /*************************************************  /*************************************************
826  *                Help function                   *  *   Apply patterns to subject till one matches   *
827  *************************************************/  *************************************************/
828    
829  static void  /* This function is called to run through all patterns, looking for a match. It
830  help(void)  is used multiple times for the same subject when colouring is enabled, in order
831  {  to find all possible matches.
832  option_item *op;  
833    Arguments:
834  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");    matchptr    the start of the subject
835  printf("Search for PATTERN in each FILE or standard input.\n");    length      the length of the subject to match
836  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");    offsets     the offets vector to fill in
837      mrc         address of where to put the result of pcre_exec()
838  printf("Options:\n");  
839    Returns:      TRUE if there was a match
840                  FALSE if there was no match
841                  invert if there was a non-fatal error
842    */
843    
844  for (op = optionlist; op->one_char != 0; op++)  static BOOL
845    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
846    {
847    int i;
848    for (i = 0; i < pattern_count; i++)
849    {    {
850    int n;    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
851    char s[4];      PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
852    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (*mrc >= 0) return TRUE;
853    printf("  %s --%s%n", s, op->long_name, &n);    if (*mrc == PCRE_ERROR_NOMATCH) continue;
854    n = 30 - n;    fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
855    if (n < 1) n = 1;    if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
856    printf("%.*s%s\n", n, "                    ", op->help_text);    fprintf(stderr, "this text:\n");
857      fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */
858      fprintf(stderr, "\n");
859      if (error_count == 0 &&
860          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
861        {
862        fprintf(stderr, "pcregrep: error %d means that a resource limit "
863          "was exceeded\n", *mrc);
864        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
865        }
866      if (error_count++ > 20)
867        {
868        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
869        exit(2);
870        }
871      return invert;    /* No more matching; don't show the line again */
872    }    }
873    
874  printf("\n  -f<filename>  or  --file=<filename>\n");  return FALSE;  /* No match, no errors */
 printf("    Read patterns from <filename> instead of using a command line option.\n");  
 printf("    Trailing white space is removed; blanks lines are ignored.\n");  
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
875  }  }
876    
877    
878    
   
879  /*************************************************  /*************************************************
880  *                Handle an option                *  *            Grep an individual file             *
881  *************************************************/  *************************************************/
882    
883    /* This is called from grep_or_recurse() below. It uses a buffer that is three
884    times the value of MBUFTHIRD. The matching point is never allowed to stray into
885    the top third of the buffer, thus keeping more of the file available for
886    context printing or for multiline scanning. For large files, the pointer will
887    be in the middle third most of the time, so the bottom third is available for
888    "before" context printing.
889    
890    Arguments:
891      handle       the fopened FILE stream for a normal file
892                   the gzFile pointer when reading is via libz
893                   the BZFILE pointer when reading is via libbz2
894      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
895      printname    the file name if it is to be printed for each match
896                   or NULL if the file name is not to be printed
897                   it cannot be NULL if filenames[_nomatch]_only is set
898    
899    Returns:       0 if there was at least one match
900                   1 otherwise (no matches)
901                   2 if there is a read error on a .bz2 file
902    */
903    
904  static int  static int
905  handle_option(int letter, int options)  pcregrep(void *handle, int frtype, char *printname)
906  {  {
907  switch(letter)  int rc = 1;
908    {  int linenumber = 1;
909    case -1:  help(); exit(0);  int lastmatchnumber = 0;
910    case 'c': count_only = TRUE; break;  int count = 0;
911    case 'h': filenames = FALSE; break;  int filepos = 0;
912    case 'i': options |= PCRE_CASELESS; break;  int offsets[OFFSET_SIZE];
913    case 'l': filenames_only = TRUE;  char *lastmatchrestart = NULL;
914    case 'n': number = TRUE; break;  char buffer[3*MBUFTHIRD];
915    case 'r': recurse = TRUE; break;  char *ptr = buffer;
916    case 's': silent = TRUE; break;  char *endptr;
917    size_t bufflength;
918    BOOL endhyphenpending = FALSE;
919    FILE *in = NULL;                    /* Ensure initialized */
920    
921    #ifdef SUPPORT_LIBZ
922    gzFile ingz = NULL;
923    #endif
924    
925    #ifdef SUPPORT_LIBBZ2
926    BZFILE *inbz2 = NULL;
927    #endif
928    
929    
930    /* Do the first read into the start of the buffer and set up the pointer to end
931    of what we have. In the case of libz, a non-zipped .gz file will be read as a
932    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
933    fail. */
934    
935    #ifdef SUPPORT_LIBZ
936    if (frtype == FR_LIBZ)
937      {
938      ingz = (gzFile)handle;
939      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
940      }
941    else
942    #endif
943    
944    #ifdef SUPPORT_LIBBZ2
945    if (frtype == FR_LIBBZ2)
946      {
947      inbz2 = (BZFILE *)handle;
948      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
949      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
950      }                                    /* without the cast it is unsigned. */
951    else
952    #endif
953    
954      {
955      in = (FILE *)handle;
956      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
957      }
958    
959    endptr = buffer + bufflength;
960    
961    /* Loop while the current pointer is not at the end of the file. For large
962    files, endptr will be at the end of the buffer when we are in the middle of the
963    file, but ptr will never get there, because as soon as it gets over 2/3 of the
964    way, the buffer is shifted left and re-filled. */
965    
966    while (ptr < endptr)
967      {
968      int endlinelength;
969      int mrc = 0;
970      BOOL match;
971      char *matchptr = ptr;
972      char *t = ptr;
973      size_t length, linelength;
974    
975      /* At this point, ptr is at the start of a line. We need to find the length
976      of the subject string to pass to pcre_exec(). In multiline mode, it is the
977      length remainder of the data in the buffer. Otherwise, it is the length of
978      the next line, excluding the terminating newline. After matching, we always
979      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
980      option is used for compiling, so that any match is constrained to be in the
981      first line. */
982    
983      t = end_of_line(t, endptr, &endlinelength);
984      linelength = t - ptr - endlinelength;
985      length = multiline? (size_t)(endptr - ptr) : linelength;
986    
987      /* Extra processing for Jeffrey Friedl's debugging. */
988    
989    #ifdef JFRIEDL_DEBUG
990      if (jfriedl_XT || jfriedl_XR)
991      {
992          #include <sys/time.h>
993          #include <time.h>
994          struct timeval start_time, end_time;
995          struct timezone dummy;
996          int i;
997    
998          if (jfriedl_XT)
999          {
1000              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1001              const char *orig = ptr;
1002              ptr = malloc(newlen + 1);
1003              if (!ptr) {
1004                      printf("out of memory");
1005                      exit(2);
1006              }
1007              endptr = ptr;
1008              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1009              for (i = 0; i < jfriedl_XT; i++) {
1010                      strncpy(endptr, orig,  length);
1011                      endptr += length;
1012              }
1013              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1014              length = newlen;
1015          }
1016    
1017          if (gettimeofday(&start_time, &dummy) != 0)
1018                  perror("bad gettimeofday");
1019    
1020    
1021          for (i = 0; i < jfriedl_XR; i++)
1022              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1023                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1024    
1025          if (gettimeofday(&end_time, &dummy) != 0)
1026                  perror("bad gettimeofday");
1027    
1028          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1029                          -
1030                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1031    
1032          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1033          return 0;
1034      }
1035    #endif
1036    
1037      /* We come back here after a match when the -o option (only_matching) is set,
1038      in order to find any further matches in the same line. */
1039    
1040      ONLY_MATCHING_RESTART:
1041    
1042      /* Run through all the patterns until one matches or there is an error other
1043      than NOMATCH. This code is in a subroutine so that it can be re-used for
1044      finding subsequent matches when colouring matched lines. */
1045    
1046      match = match_patterns(matchptr, length, offsets, &mrc);
1047    
1048      /* If it's a match or a not-match (as required), do what's wanted. */
1049    
1050      if (match != invert)
1051        {
1052        BOOL hyphenprinted = FALSE;
1053    
1054        /* We've failed if we want a file that doesn't have any matches. */
1055    
1056        if (filenames == FN_NOMATCH_ONLY) return 1;
1057    
1058        /* Just count if just counting is wanted. */
1059    
1060        if (count_only) count++;
1061    
1062        /* If all we want is a file name, there is no need to scan any more lines
1063        in the file. */
1064    
1065        else if (filenames == FN_MATCH_ONLY)
1066          {
1067          fprintf(stdout, "%s\n", printname);
1068          return 0;
1069          }
1070    
1071        /* Likewise, if all we want is a yes/no answer. */
1072    
1073        else if (quiet) return 0;
1074    
1075        /* The --only-matching option prints just the substring that matched, and
1076        the --file-offsets and --line-offsets options output offsets for the
1077        matching substring (they both force --only-matching). None of these options
1078        prints any context. Afterwards, adjust the start and length, and then jump
1079        back to look for further matches in the same line. If we are in invert
1080        mode, however, nothing is printed - this could be still useful because the
1081        return code is set. */
1082    
1083        else if (only_matching)
1084          {
1085          if (!invert)
1086            {
1087            if (printname != NULL) fprintf(stdout, "%s:", printname);
1088            if (number) fprintf(stdout, "%d:", linenumber);
1089            if (line_offsets)
1090              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1091                offsets[1] - offsets[0]);
1092            else if (file_offsets)
1093              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1094                offsets[1] - offsets[0]);
1095            else
1096              {
1097              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1098              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1099              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1100              }
1101            fprintf(stdout, "\n");
1102            matchptr += offsets[1];
1103            length -= offsets[1];
1104            match = FALSE;
1105            goto ONLY_MATCHING_RESTART;
1106            }
1107          }
1108    
1109        /* This is the default case when none of the above options is set. We print
1110        the matching lines(s), possibly preceded and/or followed by other lines of
1111        context. */
1112    
1113        else
1114          {
1115          /* See if there is a requirement to print some "after" lines from a
1116          previous match. We never print any overlaps. */
1117    
1118          if (after_context > 0 && lastmatchnumber > 0)
1119            {
1120            int ellength;
1121            int linecount = 0;
1122            char *p = lastmatchrestart;
1123    
1124            while (p < ptr && linecount < after_context)
1125              {
1126              p = end_of_line(p, ptr, &ellength);
1127              linecount++;
1128              }
1129    
1130            /* It is important to advance lastmatchrestart during this printing so
1131            that it interacts correctly with any "before" printing below. Print
1132            each line's data using fwrite() in case there are binary zeroes. */
1133    
1134            while (lastmatchrestart < p)
1135              {
1136              char *pp = lastmatchrestart;
1137              if (printname != NULL) fprintf(stdout, "%s-", printname);
1138              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1139              pp = end_of_line(pp, endptr, &ellength);
1140              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1141              lastmatchrestart = pp;
1142              }
1143            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1144            }
1145    
1146          /* If there were non-contiguous lines printed above, insert hyphens. */
1147    
1148          if (hyphenpending)
1149            {
1150            fprintf(stdout, "--\n");
1151            hyphenpending = FALSE;
1152            hyphenprinted = TRUE;
1153            }
1154    
1155          /* See if there is a requirement to print some "before" lines for this
1156          match. Again, don't print overlaps. */
1157    
1158          if (before_context > 0)
1159            {
1160            int linecount = 0;
1161            char *p = ptr;
1162    
1163            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1164                   linecount < before_context)
1165              {
1166              linecount++;
1167              p = previous_line(p, buffer);
1168              }
1169    
1170            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1171              fprintf(stdout, "--\n");
1172    
1173            while (p < ptr)
1174              {
1175              int ellength;
1176              char *pp = p;
1177              if (printname != NULL) fprintf(stdout, "%s-", printname);
1178              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1179              pp = end_of_line(pp, endptr, &ellength);
1180              fwrite(p, 1, pp - p, stdout);
1181              p = pp;
1182              }
1183            }
1184    
1185          /* Now print the matching line(s); ensure we set hyphenpending at the end
1186          of the file if any context lines are being output. */
1187    
1188          if (after_context > 0 || before_context > 0)
1189            endhyphenpending = TRUE;
1190    
1191          if (printname != NULL) fprintf(stdout, "%s:", printname);
1192          if (number) fprintf(stdout, "%d:", linenumber);
1193    
1194          /* In multiline mode, we want to print to the end of the line in which
1195          the end of the matched string is found, so we adjust linelength and the
1196          line number appropriately, but only when there actually was a match
1197          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1198          the match will always be before the first newline sequence. */
1199    
1200          if (multiline)
1201            {
1202            int ellength;
1203            char *endmatch = ptr;
1204            if (!invert)
1205              {
1206              endmatch += offsets[1];
1207              t = ptr;
1208              while (t < endmatch)
1209                {
1210                t = end_of_line(t, endptr, &ellength);
1211                if (t <= endmatch) linenumber++; else break;
1212                }
1213              }
1214            endmatch = end_of_line(endmatch, endptr, &ellength);
1215            linelength = endmatch - ptr - ellength;
1216            }
1217    
1218          /*** NOTE: Use only fwrite() to output the data line, so that binary
1219          zeroes are treated as just another data character. */
1220    
1221          /* This extra option, for Jeffrey Friedl's debugging requirements,
1222          replaces the matched string, or a specific captured string if it exists,
1223          with X. When this happens, colouring is ignored. */
1224    
1225    #ifdef JFRIEDL_DEBUG
1226          if (S_arg >= 0 && S_arg < mrc)
1227            {
1228            int first = S_arg * 2;
1229            int last  = first + 1;
1230            fwrite(ptr, 1, offsets[first], stdout);
1231            fprintf(stdout, "X");
1232            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1233            }
1234          else
1235    #endif
1236    
1237          /* We have to split the line(s) up if colouring, and search for further
1238          matches. */
1239    
1240          if (do_colour)
1241            {
1242            int last_offset = 0;
1243            fwrite(ptr, 1, offsets[0], stdout);
1244            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1245            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1246            fprintf(stdout, "%c[00m", 0x1b);
1247            for (;;)
1248              {
1249              last_offset += offsets[1];
1250              matchptr += offsets[1];
1251              length -= offsets[1];
1252              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1253              fwrite(matchptr, 1, offsets[0], stdout);
1254              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1255              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1256              fprintf(stdout, "%c[00m", 0x1b);
1257              }
1258            fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1259              stdout);
1260            }
1261    
1262          /* Not colouring; no need to search for further matches */
1263    
1264          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1265          }
1266    
1267        /* End of doing what has to be done for a match */
1268    
1269        rc = 0;    /* Had some success */
1270    
1271        /* Remember where the last match happened for after_context. We remember
1272        where we are about to restart, and that line's number. */
1273    
1274        lastmatchrestart = ptr + linelength + endlinelength;
1275        lastmatchnumber = linenumber + 1;
1276        }
1277    
1278      /* For a match in multiline inverted mode (which of course did not cause
1279      anything to be printed), we have to move on to the end of the match before
1280      proceeding. */
1281    
1282      if (multiline && invert && match)
1283        {
1284        int ellength;
1285        char *endmatch = ptr + offsets[1];
1286        t = ptr;
1287        while (t < endmatch)
1288          {
1289          t = end_of_line(t, endptr, &ellength);
1290          if (t <= endmatch) linenumber++; else break;
1291          }
1292        endmatch = end_of_line(endmatch, endptr, &ellength);
1293        linelength = endmatch - ptr - ellength;
1294        }
1295    
1296      /* Advance to after the newline and increment the line number. The file
1297      offset to the current line is maintained in filepos. */
1298    
1299      ptr += linelength + endlinelength;
1300      filepos += linelength + endlinelength;
1301      linenumber++;
1302    
1303      /* If we haven't yet reached the end of the file (the buffer is full), and
1304      the current point is in the top 1/3 of the buffer, slide the buffer down by
1305      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1306      about to be lost, print them. */
1307    
1308      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1309        {
1310        if (after_context > 0 &&
1311            lastmatchnumber > 0 &&
1312            lastmatchrestart < buffer + MBUFTHIRD)
1313          {
1314          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1315          lastmatchnumber = 0;
1316          }
1317    
1318        /* Now do the shuffle */
1319    
1320        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1321        ptr -= MBUFTHIRD;
1322    
1323    #ifdef SUPPORT_LIBZ
1324        if (frtype == FR_LIBZ)
1325          bufflength = 2*MBUFTHIRD +
1326            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1327        else
1328    #endif
1329    
1330    #ifdef SUPPORT_LIBBZ2
1331        if (frtype == FR_LIBBZ2)
1332          bufflength = 2*MBUFTHIRD +
1333            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1334        else
1335    #endif
1336    
1337        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1338    
1339        endptr = buffer + bufflength;
1340    
1341        /* Adjust any last match point */
1342    
1343        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1344        }
1345      }     /* Loop through the whole file */
1346    
1347    /* End of file; print final "after" lines if wanted; do_after_lines sets
1348    hyphenpending if it prints something. */
1349    
1350    if (!only_matching && !count_only)
1351      {
1352      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1353      hyphenpending |= endhyphenpending;
1354      }
1355    
1356    /* Print the file name if we are looking for those without matches and there
1357    were none. If we found a match, we won't have got this far. */
1358    
1359    if (filenames == FN_NOMATCH_ONLY)
1360      {
1361      fprintf(stdout, "%s\n", printname);
1362      return 0;
1363      }
1364    
1365    /* Print the match count if wanted */
1366    
1367    if (count_only)
1368      {
1369      if (count > 0 || !omit_zero_count)
1370        {
1371        if (printname != NULL && filenames != FN_NONE)
1372          fprintf(stdout, "%s:", printname);
1373        fprintf(stdout, "%d\n", count);
1374        }
1375      }
1376    
1377    return rc;
1378    }
1379    
1380    
1381    
1382    /*************************************************
1383    *     Grep a file or recurse into a directory    *
1384    *************************************************/
1385    
1386    /* Given a path name, if it's a directory, scan all the files if we are
1387    recursing; if it's a file, grep it.
1388    
1389    Arguments:
1390      pathname          the path to investigate
1391      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1392      only_one_at_top   TRUE if the path is the only one at toplevel
1393    
1394    Returns:   0 if there was at least one match
1395               1 if there were no matches
1396               2 there was some kind of error
1397    
1398    However, file opening failures are suppressed if "silent" is set.
1399    */
1400    
1401    static int
1402    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1403    {
1404    int rc = 1;
1405    int sep;
1406    int frtype;
1407    int pathlen;
1408    void *handle;
1409    FILE *in = NULL;           /* Ensure initialized */
1410    
1411    #ifdef SUPPORT_LIBZ
1412    gzFile ingz = NULL;
1413    #endif
1414    
1415    #ifdef SUPPORT_LIBBZ2
1416    BZFILE *inbz2 = NULL;
1417    #endif
1418    
1419    /* If the file name is "-" we scan stdin */
1420    
1421    if (strcmp(pathname, "-") == 0)
1422      {
1423      return pcregrep(stdin, FR_PLAIN,
1424        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1425          stdin_name : NULL);
1426      }
1427    
1428    /* If the file is a directory, skip if skipping or if we are recursing, scan
1429    each file and directory within it, subject to any include or exclude patterns
1430    that were set. The scanning code is localized so it can be made
1431    system-specific. */
1432    
1433    if ((sep = isdirectory(pathname)) != 0)
1434      {
1435      if (dee_action == dee_SKIP) return 1;
1436      if (dee_action == dee_RECURSE)
1437        {
1438        char buffer[1024];
1439        char *nextfile;
1440        directory_type *dir = opendirectory(pathname);
1441    
1442        if (dir == NULL)
1443          {
1444          if (!silent)
1445            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1446              strerror(errno));
1447          return 2;
1448          }
1449    
1450        while ((nextfile = readdirectory(dir)) != NULL)
1451          {
1452          int frc, nflen;
1453          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1454          nflen = strlen(nextfile);
1455    
1456          if (isdirectory(buffer))
1457            {
1458            if (exclude_dir_compiled != NULL &&
1459                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1460              continue;
1461    
1462            if (include_dir_compiled != NULL &&
1463                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1464              continue;
1465            }
1466          else
1467            {
1468            if (exclude_compiled != NULL &&
1469                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1470              continue;
1471    
1472            if (include_compiled != NULL &&
1473                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1474              continue;
1475            }
1476    
1477          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1478          if (frc > 1) rc = frc;
1479           else if (frc == 0 && rc == 1) rc = 0;
1480          }
1481    
1482        closedirectory(dir);
1483        return rc;
1484        }
1485      }
1486    
1487    /* If the file is not a directory and not a regular file, skip it if that's
1488    been requested. */
1489    
1490    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1491    
1492    /* Control reaches here if we have a regular file, or if we have a directory
1493    and recursion or skipping was not requested, or if we have anything else and
1494    skipping was not requested. The scan proceeds. If this is the first and only
1495    argument at top level, we don't show the file name, unless we are only showing
1496    the file name, or the filename was forced (-H). */
1497    
1498    pathlen = strlen(pathname);
1499    
1500    /* Open using zlib if it is supported and the file name ends with .gz. */
1501    
1502    #ifdef SUPPORT_LIBZ
1503    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1504      {
1505      ingz = gzopen(pathname, "rb");
1506      if (ingz == NULL)
1507        {
1508        if (!silent)
1509          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1510            strerror(errno));
1511        return 2;
1512        }
1513      handle = (void *)ingz;
1514      frtype = FR_LIBZ;
1515      }
1516    else
1517    #endif
1518    
1519    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1520    
1521    #ifdef SUPPORT_LIBBZ2
1522    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1523      {
1524      inbz2 = BZ2_bzopen(pathname, "rb");
1525      handle = (void *)inbz2;
1526      frtype = FR_LIBBZ2;
1527      }
1528    else
1529    #endif
1530    
1531    /* Otherwise use plain fopen(). The label is so that we can come back here if
1532    an attempt to read a .bz2 file indicates that it really is a plain file. */
1533    
1534    #ifdef SUPPORT_LIBBZ2
1535    PLAIN_FILE:
1536    #endif
1537      {
1538      in = fopen(pathname, "rb");
1539      handle = (void *)in;
1540      frtype = FR_PLAIN;
1541      }
1542    
1543    /* All the opening methods return errno when they fail. */
1544    
1545    if (handle == NULL)
1546      {
1547      if (!silent)
1548        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1549          strerror(errno));
1550      return 2;
1551      }
1552    
1553    /* Now grep the file */
1554    
1555    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1556      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1557    
1558    /* Close in an appropriate manner. */
1559    
1560    #ifdef SUPPORT_LIBZ
1561    if (frtype == FR_LIBZ)
1562      gzclose(ingz);
1563    else
1564    #endif
1565    
1566    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1567    read failed. If the error indicates that the file isn't in fact bzipped, try
1568    again as a normal file. */
1569    
1570    #ifdef SUPPORT_LIBBZ2
1571    if (frtype == FR_LIBBZ2)
1572      {
1573      if (rc == 2)
1574        {
1575        int errnum;
1576        const char *err = BZ2_bzerror(inbz2, &errnum);
1577        if (errnum == BZ_DATA_ERROR_MAGIC)
1578          {
1579          BZ2_bzclose(inbz2);
1580          goto PLAIN_FILE;
1581          }
1582        else if (!silent)
1583          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1584            pathname, err);
1585        }
1586      BZ2_bzclose(inbz2);
1587      }
1588    else
1589    #endif
1590    
1591    /* Normal file close */
1592    
1593    fclose(in);
1594    
1595    /* Pass back the yield from pcregrep(). */
1596    
1597    return rc;
1598    }
1599    
1600    
1601    
1602    
1603    /*************************************************
1604    *                Usage function                  *
1605    *************************************************/
1606    
1607    static int
1608    usage(int rc)
1609    {
1610    option_item *op;
1611    fprintf(stderr, "Usage: pcregrep [-");
1612    for (op = optionlist; op->one_char != 0; op++)
1613      {
1614      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1615      }
1616    fprintf(stderr, "] [long options] [pattern] [files]\n");
1617    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1618      "options.\n");
1619    return rc;
1620    }
1621    
1622    
1623    
1624    
1625    /*************************************************
1626    *                Help function                   *
1627    *************************************************/
1628    
1629    static void
1630    help(void)
1631    {
1632    option_item *op;
1633    
1634    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1635    printf("Search for PATTERN in each FILE or standard input.\n");
1636    printf("PATTERN must be present if neither -e nor -f is used.\n");
1637    printf("\"-\" can be used as a file name to mean STDIN.\n");
1638    
1639    #ifdef SUPPORT_LIBZ
1640    printf("Files whose names end in .gz are read using zlib.\n");
1641    #endif
1642    
1643    #ifdef SUPPORT_LIBBZ2
1644    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1645    #endif
1646    
1647    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1648    printf("Other files and the standard input are read as plain files.\n\n");
1649    #else
1650    printf("All files are read as plain files, without any interpretation.\n\n");
1651    #endif
1652    
1653    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1654    printf("Options:\n");
1655    
1656    for (op = optionlist; op->one_char != 0; op++)
1657      {
1658      int n;
1659      char s[4];
1660      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1661      n = 30 - printf("  %s --%s", s, op->long_name);
1662      if (n < 1) n = 1;
1663      printf("%.*s%s\n", n, "                    ", op->help_text);
1664      }
1665    
1666    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1667    printf("trailing white space is removed and blank lines are ignored.\n");
1668    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1669    
1670    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1671    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1672    }
1673    
1674    
1675    
1676    
1677    /*************************************************
1678    *    Handle a single-letter, no data option      *
1679    *************************************************/
1680    
1681    static int
1682    handle_option(int letter, int options)
1683    {
1684    switch(letter)
1685      {
1686      case N_FOFFSETS: file_offsets = TRUE; break;
1687      case N_HELP: help(); exit(0);
1688      case N_LOFFSETS: line_offsets = number = TRUE; break;
1689      case 'c': count_only = TRUE; break;
1690      case 'F': process_options |= PO_FIXED_STRINGS; break;
1691      case 'H': filenames = FN_FORCE; break;
1692      case 'h': filenames = FN_NONE; break;
1693      case 'i': options |= PCRE_CASELESS; break;
1694      case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1695      case 'L': filenames = FN_NOMATCH_ONLY; break;
1696      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1697      case 'n': number = TRUE; break;
1698      case 'o': only_matching = TRUE; break;
1699      case 'q': quiet = TRUE; break;
1700      case 'r': dee_action = dee_RECURSE; break;
1701      case 's': silent = TRUE; break;
1702      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1703    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1704    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1705      case 'x': process_options |= PO_LINE_MATCH; break;
1706    
1707      case 'V':
1708      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1709      exit(0);
1710      break;
1711    
1712      default:
1713      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1714      exit(usage(2));
1715      }
1716    
1717    return options;
1718    }
1719    
1720    
1721    
1722    
1723    /*************************************************
1724    *          Construct printed ordinal             *
1725    *************************************************/
1726    
1727    case 'V':  /* This turns a number into "1st", "3rd", etc. */
   fprintf(stderr, "pcregrep version %s using ", VERSION);  
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
   exit(0);  
   break;  
1728    
1729    default:  static char *
1730    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);  ordin(int n)
1731    exit(usage(2));  {
1732    static char buffer[8];
1733    char *p = buffer;
1734    sprintf(p, "%d", n);
1735    while (*p != 0) p++;
1736    switch (n%10)
1737      {
1738      case 1: strcpy(p, "st"); break;
1739      case 2: strcpy(p, "nd"); break;
1740      case 3: strcpy(p, "rd"); break;
1741      default: strcpy(p, "th"); break;
1742    }    }
1743    return buffer;
1744    }
1745    
1746  return options;  
1747    
1748    /*************************************************
1749    *          Compile a single pattern              *
1750    *************************************************/
1751    
1752    /* When the -F option has been used, this is called for each substring.
1753    Otherwise it's called for each supplied pattern.
1754    
1755    Arguments:
1756      pattern        the pattern string
1757      options        the PCRE options
1758      filename       the file name, or NULL for a command-line pattern
1759      count          0 if this is the only command line pattern, or
1760                     number of the command line pattern, or
1761                     linenumber for a pattern from a file
1762    
1763    Returns:         TRUE on success, FALSE after an error
1764    */
1765    
1766    static BOOL
1767    compile_single_pattern(char *pattern, int options, char *filename, int count)
1768    {
1769    char buffer[MBUFTHIRD + 16];
1770    const char *error;
1771    int errptr;
1772    
1773    if (pattern_count >= MAX_PATTERN_COUNT)
1774      {
1775      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1776        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1777      return FALSE;
1778      }
1779    
1780    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1781      suffix[process_options]);
1782    pattern_list[pattern_count] =
1783      pcre_compile(buffer, options, &error, &errptr, pcretables);
1784    if (pattern_list[pattern_count] != NULL)
1785      {
1786      pattern_count++;
1787      return TRUE;
1788      }
1789    
1790    /* Handle compile errors */
1791    
1792    errptr -= (int)strlen(prefix[process_options]);
1793    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1794    
1795    if (filename == NULL)
1796      {
1797      if (count == 0)
1798        fprintf(stderr, "pcregrep: Error in command-line regex "
1799          "at offset %d: %s\n", errptr, error);
1800      else
1801        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1802          "at offset %d: %s\n", ordin(count), errptr, error);
1803      }
1804    else
1805      {
1806      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1807        "at offset %d: %s\n", count, filename, errptr, error);
1808      }
1809    
1810    return FALSE;
1811  }  }
1812    
1813    
1814    
1815    /*************************************************
1816    *           Compile one supplied pattern         *
1817    *************************************************/
1818    
1819    /* When the -F option has been used, each string may be a list of strings,
1820    separated by line breaks. They will be matched literally.
1821    
1822    Arguments:
1823      pattern        the pattern string
1824      options        the PCRE options
1825      filename       the file name, or NULL for a command-line pattern
1826      count          0 if this is the only command line pattern, or
1827                     number of the command line pattern, or
1828                     linenumber for a pattern from a file
1829    
1830    Returns:         TRUE on success, FALSE after an error
1831    */
1832    
1833    static BOOL
1834    compile_pattern(char *pattern, int options, char *filename, int count)
1835    {
1836    if ((process_options & PO_FIXED_STRINGS) != 0)
1837      {
1838      char *eop = pattern + strlen(pattern);
1839      char buffer[MBUFTHIRD];
1840      for(;;)
1841        {
1842        int ellength;
1843        char *p = end_of_line(pattern, eop, &ellength);
1844        if (ellength == 0)
1845          return compile_single_pattern(pattern, options, filename, count);
1846        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1847        pattern = p;
1848        if (!compile_single_pattern(buffer, options, filename, count))
1849          return FALSE;
1850        }
1851      }
1852    else return compile_single_pattern(pattern, options, filename, count);
1853    }
1854    
1855    
1856    
1857  /*************************************************  /*************************************************
1858  *                Main program                    *  *                Main program                    *
1859  *************************************************/  *************************************************/
1860    
1861    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1862    
1863  int  int
1864  main(int argc, char **argv)  main(int argc, char **argv)
1865  {  {
1866  int i, j;  int i, j;
1867  int rc = 1;  int rc = 1;
1868  int options = 0;  int pcre_options = 0;
1869    int cmd_pattern_count = 0;
1870    int hint_count = 0;
1871  int errptr;  int errptr;
 const char *error;  
1872  BOOL only_one_at_top;  BOOL only_one_at_top;
1873    char *patterns[MAX_PATTERN_COUNT];
1874    const char *locale_from = "--locale";
1875    const char *error;
1876    
1877    /* Set the default line ending value from the default in the PCRE library;
1878    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1879    Note that the return values from pcre_config(), though derived from the ASCII
1880    codes, are the same in EBCDIC environments, so we must use the actual values
1881    rather than escapes such as as '\r'. */
1882    
1883    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1884    switch(i)
1885      {
1886      default:               newline = (char *)"lf"; break;
1887      case 13:               newline = (char *)"cr"; break;
1888      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1889      case -1:               newline = (char *)"any"; break;
1890      case -2:               newline = (char *)"anycrlf"; break;
1891      }
1892    
1893  /* Process the options */  /* Process the options */
1894    
1895  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
1896    {    {
1897      option_item *op = NULL;
1898      char *option_data = (char *)"";    /* default to keep compiler happy */
1899      BOOL longop;
1900      BOOL longopwasequals = FALSE;
1901    
1902    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1903    
1904    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1905      but only if we have previously had -e or -f to define the patterns. */
1906    
1907      if (argv[i][1] == 0)
1908        {
1909        if (pattern_filename != NULL || pattern_count > 0) break;
1910          else exit(usage(2));
1911        }
1912    
1913      /* Handle a long name option, or -- to terminate the options */
1914    
1915    if (argv[i][1] == '-')    if (argv[i][1] == '-')
1916      {      {
1917      option_item *op;      char *arg = argv[i] + 2;
1918        char *argequals = strchr(arg, '=');
1919    
1920      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
1921        {        {
1922        pattern_filename = argv[i] + 7;        i++;
1923        continue;        break;                /* out of the options-handling loop */
1924        }        }
1925    
1926        longop = TRUE;
1927    
1928        /* Some long options have data that follows after =, for example file=name.
1929        Some options have variations in the long name spelling: specifically, we
1930        allow "regexp" because GNU grep allows it, though I personally go along
1931        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1932        These options are entered in the table as "regex(p)". No option is in both
1933        these categories, fortunately. */
1934    
1935      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1936        {        {
1937        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1938          char *equals = strchr(op->long_name, '=');
1939          if (opbra == NULL)     /* Not a (p) case */
1940          {          {
1941          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1942          break;            {
1943              if (strcmp(arg, op->long_name) == 0) break;
1944              }
1945            else                 /* Special case xxx=data */
1946              {
1947              int oplen = equals - op->long_name;
1948              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1949              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1950                {
1951                option_data = arg + arglen;
1952                if (*option_data == '=')
1953                  {
1954                  option_data++;
1955                  longopwasequals = TRUE;
1956                  }
1957                break;
1958                }
1959              }
1960            }
1961          else                   /* Special case xxxx(p) */
1962            {
1963            char buff1[24];
1964            char buff2[24];
1965            int baselen = opbra - op->long_name;
1966            sprintf(buff1, "%.*s", baselen, op->long_name);
1967            sprintf(buff2, "%s%.*s", buff1,
1968              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1969            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1970              break;
1971          }          }
1972        }        }
1973    
1974      if (op->one_char == 0)      if (op->one_char == 0)
1975        {        {
1976        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 417  for (i = 1; i < argc; i++) Line 1978  for (i = 1; i < argc; i++)
1978        }        }
1979      }      }
1980    
1981    /* One-char options */  
1982      /* Jeffrey Friedl's debugging harness uses these additional options which
1983      are not in the right form for putting in the option table because they use
1984      only one hyphen, yet are more than one character long. By putting them
1985      separately here, they will not get displayed as part of the help() output,
1986      but I don't think Jeffrey will care about that. */
1987    
1988    #ifdef JFRIEDL_DEBUG
1989      else if (strcmp(argv[i], "-pre") == 0) {
1990              jfriedl_prefix = argv[++i];
1991              continue;
1992      } else if (strcmp(argv[i], "-post") == 0) {
1993              jfriedl_postfix = argv[++i];
1994              continue;
1995      } else if (strcmp(argv[i], "-XT") == 0) {
1996              sscanf(argv[++i], "%d", &jfriedl_XT);
1997              continue;
1998      } else if (strcmp(argv[i], "-XR") == 0) {
1999              sscanf(argv[++i], "%d", &jfriedl_XR);
2000              continue;
2001      }
2002    #endif
2003    
2004    
2005      /* One-char options; many that have no data may be in a single argument; we
2006      continue till we hit the last one or one that needs data. */
2007    
2008    else    else
2009      {      {
2010      char *s = argv[i] + 1;      char *s = argv[i] + 1;
2011        longop = FALSE;
2012      while (*s != 0)      while (*s != 0)
2013        {        {
2014        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
2015            { if (*s == op->one_char) break; }
2016          if (op->one_char == 0)
2017          {          {
2018          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2019          if (pattern_filename[0] == 0)            *s, argv[i]);
2020            {          exit(usage(2));
2021            if (i >= argc - 1)          }
2022              {        if (op->type != OP_NODATA || s[1] == 0)
2023              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
2024              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
2025          break;          break;
2026          }          }
2027        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
2028        }        }
2029      }      }
   }  
2030    
2031  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
2032  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
2033      something in the PCRE options. */
2034    
2035  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
2036    {      {
2037    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
2038    return 2;      continue;
2039    }      }
2040    
2041  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2042      either has a value or defaults to something. It cannot have data in a
2043      separate item. At the moment, the only such options are "colo(u)r" and
2044      Jeffrey Friedl's special -S debugging option. */
2045    
2046  if (pattern_filename != NULL)    if (*option_data == 0 &&
2047    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
2048      {      {
2049      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
2050        strerror(errno));        {
2051      return 2;        case N_COLOUR:
2052          colour_option = (char *)"auto";
2053          break;
2054    #ifdef JFRIEDL_DEBUG
2055          case 'S':
2056          S_arg = 0;
2057          break;
2058    #endif
2059          }
2060        continue;
2061      }      }
2062    while (fgets(buffer, sizeof(buffer), f) != NULL)  
2063      /* Otherwise, find the data string for the option. */
2064    
2065      if (*option_data == 0)
2066      {      {
2067      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
2068      if (pattern_count >= MAX_PATTERN_COUNT)        {
2069          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2070          exit(usage(2));
2071          }
2072        option_data = argv[++i];
2073        }
2074    
2075      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2076      multiple times to create a list of patterns. */
2077    
2078      if (op->type == OP_PATLIST)
2079        {
2080        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2081        {        {
2082        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2083          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2084        return 2;        return 2;
2085        }        }
2086      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2087      if (s == buffer) continue;      }
2088      *s = 0;  
2089      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2090        &errptr, NULL);  
2091      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2092        {
2093        *((char **)op->dataptr) = option_data;
2094        }
2095      else
2096        {
2097        char *endptr;
2098        int n = strtoul(option_data, &endptr, 10);
2099        if (*endptr != 0)
2100        {        {
2101        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
2102          pattern_count, errptr, error);          {
2103        return 2;          char *equals = strchr(op->long_name, '=');
2104            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2105              equals - op->long_name;
2106            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2107              option_data, nlen, op->long_name);
2108            }
2109          else
2110            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2111              option_data, op->one_char);
2112          exit(usage(2));
2113        }        }
2114        *((int *)op->dataptr) = n;
2115        }
2116      }
2117    
2118    /* Options have been decoded. If -C was used, its value is used as a default
2119    for -A and -B. */
2120    
2121    if (both_context > 0)
2122      {
2123      if (after_context == 0) after_context = both_context;
2124      if (before_context == 0) before_context = both_context;
2125      }
2126    
2127    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2128    However, the latter two set the only_matching flag. */
2129    
2130    if ((only_matching && (file_offsets || line_offsets)) ||
2131        (file_offsets && line_offsets))
2132      {
2133      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2134        "and/or --line-offsets\n");
2135      exit(usage(2));
2136      }
2137    
2138    if (file_offsets || line_offsets) only_matching = TRUE;
2139    
2140    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2141    LC_ALL environment variable is set, and if so, use it. */
2142    
2143    if (locale == NULL)
2144      {
2145      locale = getenv("LC_ALL");
2146      locale_from = "LCC_ALL";
2147      }
2148    
2149    if (locale == NULL)
2150      {
2151      locale = getenv("LC_CTYPE");
2152      locale_from = "LC_CTYPE";
2153      }
2154    
2155    /* If a locale has been provided, set it, and generate the tables the PCRE
2156    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2157    
2158    if (locale != NULL)
2159      {
2160      if (setlocale(LC_CTYPE, locale) == NULL)
2161        {
2162        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2163          locale, locale_from);
2164        return 2;
2165        }
2166      pcretables = pcre_maketables();
2167      }
2168    
2169    /* Sort out colouring */
2170    
2171    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2172      {
2173      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2174      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2175      else
2176        {
2177        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2178          colour_option);
2179        return 2;
2180        }
2181      if (do_colour)
2182        {
2183        char *cs = getenv("PCREGREP_COLOUR");
2184        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2185        if (cs != NULL) colour_string = cs;
2186      }      }
   fclose(f);  
2187    }    }
2188    
2189  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
2190    
2191    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2192      {
2193      pcre_options |= PCRE_NEWLINE_CR;
2194      endlinetype = EL_CR;
2195      }
2196    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2197      {
2198      pcre_options |= PCRE_NEWLINE_LF;
2199      endlinetype = EL_LF;
2200      }
2201    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2202      {
2203      pcre_options |= PCRE_NEWLINE_CRLF;
2204      endlinetype = EL_CRLF;
2205      }
2206    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2207      {
2208      pcre_options |= PCRE_NEWLINE_ANY;
2209      endlinetype = EL_ANY;
2210      }
2211    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2212      {
2213      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2214      endlinetype = EL_ANYCRLF;
2215      }
2216  else  else
2217    {    {
2218    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2219    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2220    if (pattern_list[0] == NULL)    }
2221    
2222    /* Interpret the text values for -d and -D */
2223    
2224    if (dee_option != NULL)
2225      {
2226      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2227      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2228      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2229      else
2230        {
2231        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2232        return 2;
2233        }
2234      }
2235    
2236    if (DEE_option != NULL)
2237      {
2238      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2239      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2240      else
2241      {      {
2242      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
       error);  
2243      return 2;      return 2;
2244      }      }
   pattern_count++;  
2245    }    }
2246    
2247  /* Study the regular expressions, as we will be running them may times */  /* Check the values for Jeffrey Friedl's debugging options. */
2248    
2249    #ifdef JFRIEDL_DEBUG
2250    if (S_arg > 9)
2251      {
2252      fprintf(stderr, "pcregrep: bad value for -S option\n");
2253      return 2;
2254      }
2255    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2256      {
2257      if (jfriedl_XT == 0) jfriedl_XT = 1;
2258      if (jfriedl_XR == 0) jfriedl_XR = 1;
2259      }
2260    #endif
2261    
2262    /* Get memory to store the pattern and hints lists. */
2263    
2264    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2265    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2266    
2267    if (pattern_list == NULL || hints_list == NULL)
2268      {
2269      fprintf(stderr, "pcregrep: malloc failed\n");
2270      goto EXIT2;
2271      }
2272    
2273    /* If no patterns were provided by -e, and there is no file provided by -f,
2274    the first argument is the one and only pattern, and it must exist. */
2275    
2276    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2277      {
2278      if (i >= argc) return usage(2);
2279      patterns[cmd_pattern_count++] = argv[i++];
2280      }
2281    
2282    /* Compile the patterns that were provided on the command line, either by
2283    multiple uses of -e or as a single unkeyed pattern. */
2284    
2285    for (j = 0; j < cmd_pattern_count; j++)
2286      {
2287      if (!compile_pattern(patterns[j], pcre_options, NULL,
2288           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2289        goto EXIT2;
2290      }
2291    
2292    /* Compile the regular expressions that are provided in a file. */
2293    
2294    if (pattern_filename != NULL)
2295      {
2296      int linenumber = 0;
2297      FILE *f;
2298      char *filename;
2299      char buffer[MBUFTHIRD];
2300    
2301      if (strcmp(pattern_filename, "-") == 0)
2302        {
2303        f = stdin;
2304        filename = stdin_name;
2305        }
2306      else
2307        {
2308        f = fopen(pattern_filename, "r");
2309        if (f == NULL)
2310          {
2311          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2312            strerror(errno));
2313          goto EXIT2;
2314          }
2315        filename = pattern_filename;
2316        }
2317    
2318      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2319        {
2320        char *s = buffer + (int)strlen(buffer);
2321        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2322        *s = 0;
2323        linenumber++;
2324        if (buffer[0] == 0) continue;   /* Skip blank lines */
2325        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2326          goto EXIT2;
2327        }
2328    
2329      if (f != stdin) fclose(f);
2330      }
2331    
2332    /* Study the regular expressions, as we will be running them many times */
2333    
2334  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2335    {    {
# Line 513  for (j = 0; j < pattern_count; j++) Line 2339  for (j = 0; j < pattern_count; j++)
2339      char s[16];      char s[16];
2340      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2341      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2342      return 2;      goto EXIT2;
2343        }
2344      hint_count++;
2345      }
2346    
2347    /* If there are include or exclude patterns, compile them. */
2348    
2349    if (exclude_pattern != NULL)
2350      {
2351      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2352        pcretables);
2353      if (exclude_compiled == NULL)
2354        {
2355        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2356          errptr, error);
2357        goto EXIT2;
2358        }
2359      }
2360    
2361    if (include_pattern != NULL)
2362      {
2363      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2364        pcretables);
2365      if (include_compiled == NULL)
2366        {
2367        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2368          errptr, error);
2369        goto EXIT2;
2370        }
2371      }
2372    
2373    if (exclude_dir_pattern != NULL)
2374      {
2375      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2376        pcretables);
2377      if (exclude_dir_compiled == NULL)
2378        {
2379        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2380          errptr, error);
2381        goto EXIT2;
2382        }
2383      }
2384    
2385    if (include_dir_pattern != NULL)
2386      {
2387      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2388        pcretables);
2389      if (include_dir_compiled == NULL)
2390        {
2391        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2392          errptr, error);
2393        goto EXIT2;
2394      }      }
2395    }    }
2396    
2397  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2398    
2399  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2400      {
2401      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2402      goto EXIT;
2403      }
2404    
2405  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2406  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2407  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2408    otherwise forced. */
2409    
2410  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2411    
2412  for (; i < argc; i++)  for (; i < argc; i++)
2413    {    {
2414    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2415    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2416      if (frc > 1) rc = frc;
2417        else if (frc == 0 && rc == 1) rc = 0;
2418    }    }
2419    
2420    EXIT:
2421    if (pattern_list != NULL)
2422      {
2423      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2424      free(pattern_list);
2425      }
2426    if (hints_list != NULL)
2427      {
2428      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2429      free(hints_list);
2430      }
2431  return rc;  return rc;
2432    
2433    EXIT2:
2434    rc = 2;
2435    goto EXIT;
2436  }  }
2437    
2438  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.421

  ViewVC Help
Powered by ViewVC 1.1.5