/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 296 by ph10, Tue Jan 1 20:09:30 2008 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2007 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 70  its pattern matching. On a Unix system i
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "2.0 01-Aug-2001"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    
75    #if BUFSIZ > 8192
76    #define MBUFTHIRD BUFSIZ
77    #else
78    #define MBUFTHIRD 8192
79    #endif
80    
81    /* Values for the "filenames" variable, which specifies options for file name
82    output. The order is important; it is assumed that a file name is wanted for
83    all values greater than FN_DEFAULT. */
84    
85    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86    
87    /* File reading styles */
88    
89    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90    
91    /* Actions for the -d and -D options */
92    
93    enum { dee_READ, dee_SKIP, dee_RECURSE };
94    enum { DEE_READ, DEE_SKIP };
95    
96    /* Actions for special processing options (flag bits) */
97    
98    #define PO_WORD_MATCH     0x0001
99    #define PO_LINE_MATCH     0x0002
100    #define PO_FIXED_STRINGS  0x0004
101    
102    /* Line ending types */
103    
104    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105    
106    
107    
108  /*************************************************  /*************************************************
109  *               Global variables                 *  *               Global variables                 *
110  *************************************************/  *************************************************/
111    
112    /* Jeffrey Friedl has some debugging requirements that are not part of the
113    regular code. */
114    
115    #ifdef JFRIEDL_DEBUG
116    static int S_arg = -1;
117    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
118    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
119    static const char *jfriedl_prefix = "";
120    static const char *jfriedl_postfix = "";
121    #endif
122    
123    static int  endlinetype;
124    
125    static char *colour_string = (char *)"1;31";
126    static char *colour_option = NULL;
127    static char *dee_option = NULL;
128    static char *DEE_option = NULL;
129    static char *newline = NULL;
130  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
131    static char *stdin_name = (char *)"(standard input)";
132    static char *locale = NULL;
133    
134    static const unsigned char *pcretables = NULL;
135    
136  static int  pattern_count = 0;  static int  pattern_count = 0;
137  static pcre **pattern_list;  static pcre **pattern_list = NULL;
138  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
139    
140    static char *include_pattern = NULL;
141    static char *exclude_pattern = NULL;
142    
143    static pcre *include_compiled = NULL;
144    static pcre *exclude_compiled = NULL;
145    
146    static int after_context = 0;
147    static int before_context = 0;
148    static int both_context = 0;
149    static int dee_action = dee_READ;
150    static int DEE_action = DEE_READ;
151    static int error_count = 0;
152    static int filenames = FN_DEFAULT;
153    static int process_options = 0;
154    
155  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
156  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
157  static BOOL filenames_only = FALSE;  static BOOL file_offsets = FALSE;
158    static BOOL hyphenpending = FALSE;
159  static BOOL invert = FALSE;  static BOOL invert = FALSE;
160    static BOOL line_offsets = FALSE;
161    static BOOL multiline = FALSE;
162  static BOOL number = FALSE;  static BOOL number = FALSE;
163  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
164    static BOOL quiet = FALSE;
165  static BOOL silent = FALSE;  static BOOL silent = FALSE;
166  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
167    
168  /* Structure for options and list of them */  /* Structure for options and list of them */
169    
170    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
171           OP_PATLIST };
172    
173  typedef struct option_item {  typedef struct option_item {
174      int type;
175    int one_char;    int one_char;
176    char *long_name;    void *dataptr;
177    char *help_text;    const char *long_name;
178      const char *help_text;
179  } option_item;  } option_item;
180    
181    /* Options without a single-letter equivalent get a negative value. This can be
182    used to identify them. */
183    
184    #define N_COLOUR    (-1)
185    #define N_EXCLUDE   (-2)
186    #define N_HELP      (-3)
187    #define N_INCLUDE   (-4)
188    #define N_LABEL     (-5)
189    #define N_LOCALE    (-6)
190    #define N_NULL      (-7)
191    #define N_LOFFSETS  (-8)
192    #define N_FOFFSETS  (-9)
193    
194  static option_item optionlist[] = {  static option_item optionlist[] = {
195    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
196    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
197    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
198    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
199    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
200    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
201    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
202    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
203    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
204    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
205    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
206    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
207    { 0,    NULL,           NULL }    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
208      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
209      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
210      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
211      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
212      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
213      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
214      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
215      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
216      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
217      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
218      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
219      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
220      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
221      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
222      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
223      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
224      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
225    #ifdef JFRIEDL_DEBUG
226      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
227    #endif
228      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
229      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
230      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
231      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
232      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
233      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
234      { OP_NODATA,    0,        NULL,               NULL,            NULL }
235  };  };
236    
237    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
238    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
239    that the combination of -w and -x has the same effect as -x on its own, so we
240    can treat them as the same. */
241    
242    static const char *prefix[] = {
243      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
244    
245    static const char *suffix[] = {
246      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
247    
248    /* UTF-8 tables - used only when the newline setting is "any". */
249    
250    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
251    
252    const char utf8_table4[] = {
253      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
254      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
255      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
256      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
257    
258    
259    
260  /*************************************************  /*************************************************
261  *       Functions for directory scanning         *  *            OS-specific functions               *
262  *************************************************/  *************************************************/
263    
264  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
265  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
266    
267    
268  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
269    
270  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
271  #include <sys/types.h>  #include <sys/types.h>
272  #include <sys/stat.h>  #include <sys/stat.h>
273  #include <dirent.h>  #include <dirent.h>
274    
275  typedef DIR directory_type;  typedef DIR directory_type;
276    
277  int  static int
278  isdirectory(char *filename)  isdirectory(char *filename)
279  {  {
280  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 283  if (stat(filename, &statbuf) < 0)
283  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
284  }  }
285    
286  directory_type *  static directory_type *
287  opendirectory(char *filename)  opendirectory(char *filename)
288  {  {
289  return opendir(filename);  return opendir(filename);
290  }  }
291    
292  char *  static char *
293  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
294  {  {
295  for (;;)  for (;;)
# Line 108  for (;;) Line 299  for (;;)
299    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
300      return dent->d_name;      return dent->d_name;
301    }    }
302    /* Control never reaches here */
303    }
304    
305    static void
306    closedirectory(directory_type *dir)
307    {
308    closedir(dir);
309    }
310    
311    
312    /************* Test for regular file in Unix **********/
313    
314    static int
315    isregfile(char *filename)
316    {
317    struct stat statbuf;
318    if (stat(filename, &statbuf) < 0)
319      return 1;        /* In the expectation that opening as a file will fail */
320    return (statbuf.st_mode & S_IFMT) == S_IFREG;
321    }
322    
323    
324    /************* Test stdout for being a terminal in Unix **********/
325    
326    static BOOL
327    is_stdout_tty(void)
328    {
329    return isatty(fileno(stdout));
330    }
331    
332    
333    /************* Directory scanning in Win32 ***********/
334    
335    /* I (Philip Hazel) have no means of testing this code. It was contributed by
336    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
337    when it did not exist. David Byron added a patch that moved the #include of
338    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
339    */
340    
341    #elif HAVE_WINDOWS_H
342    
343    #ifndef STRICT
344    # define STRICT
345    #endif
346    #ifndef WIN32_LEAN_AND_MEAN
347    # define WIN32_LEAN_AND_MEAN
348    #endif
349    
350    #include <windows.h>
351    
352    #ifndef INVALID_FILE_ATTRIBUTES
353    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
354    #endif
355    
356    typedef struct directory_type
357    {
358    HANDLE handle;
359    BOOL first;
360    WIN32_FIND_DATA data;
361    } directory_type;
362    
363    int
364    isdirectory(char *filename)
365    {
366    DWORD attr = GetFileAttributes(filename);
367    if (attr == INVALID_FILE_ATTRIBUTES)
368      return 0;
369    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
370    }
371    
372    directory_type *
373    opendirectory(char *filename)
374    {
375    size_t len;
376    char *pattern;
377    directory_type *dir;
378    DWORD err;
379    len = strlen(filename);
380    pattern = (char *) malloc(len + 3);
381    dir = (directory_type *) malloc(sizeof(*dir));
382    if ((pattern == NULL) || (dir == NULL))
383      {
384      fprintf(stderr, "pcregrep: malloc failed\n");
385      exit(2);
386      }
387    memcpy(pattern, filename, len);
388    memcpy(&(pattern[len]), "\\*", 3);
389    dir->handle = FindFirstFile(pattern, &(dir->data));
390    if (dir->handle != INVALID_HANDLE_VALUE)
391      {
392      free(pattern);
393      dir->first = TRUE;
394      return dir;
395      }
396    err = GetLastError();
397    free(pattern);
398    free(dir);
399    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
400    return NULL;
401    }
402    
403    char *
404    readdirectory(directory_type *dir)
405    {
406    for (;;)
407      {
408      if (!dir->first)
409        {
410        if (!FindNextFile(dir->handle, &(dir->data)))
411          return NULL;
412        }
413      else
414        {
415        dir->first = FALSE;
416        }
417      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
418        return dir->data.cFileName;
419      }
420    #ifndef _MSC_VER
421  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
422    #endif
423  }  }
424    
425  void  void
426  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
427  {  {
428  closedir(dir);  FindClose(dir->handle);
429    free(dir);
430  }  }
431    
432    
433  #else  /************* Test for regular file in Win32 **********/
434    
435    /* I don't know how to do this, or if it can be done; assume all paths are
436    regular if they are not directories. */
437    
438    int isregfile(char *filename)
439    {
440    return !isdirectory(filename);
441    }
442    
443    
444    /************* Test stdout for being a terminal in Win32 **********/
445    
446    /* I don't know how to do this; assume never */
447    
448    static BOOL
449    is_stdout_tty(void)
450    {
451    return FALSE;
452    }
453    
454    
455  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
456    
457  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
458    
459    #else
460    
461  typedef void directory_type;  typedef void directory_type;
462    
463  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
464  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
465  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
466  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
467    
468    
469    /************* Test for regular when we can't do it **********/
470    
471    /* Assume all files are regular. */
472    
473    int isregfile(char *filename) { return 1; }
474    
475    
476    /************* Test stdout for being a terminal when we can't do it **********/
477    
478    static BOOL
479    is_stdout_tty(void)
480    {
481    return FALSE;
482    }
483    
484    
485  #endif  #endif
486    
487    
488    
489  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
490  /*************************************************  /*************************************************
491  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
492  *************************************************/  *************************************************/
# Line 159  return sys_errlist[n]; Line 509  return sys_errlist[n];
509    
510    
511  /*************************************************  /*************************************************
512  *              Grep an individual file           *  *             Find end of line                   *
513  *************************************************/  *************************************************/
514    
515  static int  /* The length of the endline sequence that is found is set via lenptr. This may
516  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
517  {  
518  int rc = 1;  Arguments:
519  int linenumber = 0;    p         current position in line
520  int count = 0;    endptr    end of available data
521  int offsets[99];    lenptr    where to put the length of the eol sequence
522  char buffer[BUFSIZ];  
523    Returns:    pointer to the last byte of the line
524    */
525    
526  while (fgets(buffer, sizeof(buffer), in) != NULL)  static char *
527    end_of_line(char *p, char *endptr, int *lenptr)
528    {
529    switch(endlinetype)
530    {    {
531    BOOL match = FALSE;    default:      /* Just in case */
532    int i;    case EL_LF:
533    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
534    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
535    linenumber++;      {
536        *lenptr = 1;
537        return p + 1;
538        }
539      *lenptr = 0;
540      return endptr;
541    
542    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
543      while (p < endptr && *p != '\r') p++;
544      if (p < endptr)
545      {      {
546      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
547        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
548      }      }
549      *lenptr = 0;
550      return endptr;
551    
552    if (match != invert)    case EL_CRLF:
553      for (;;)
554      {      {
555      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
556        if (++p >= endptr)
557          {
558          *lenptr = 0;
559          return endptr;
560          }
561        if (*p == '\n')
562          {
563          *lenptr = 2;
564          return p + 1;
565          }
566        }
567      break;
568    
569      case EL_ANYCRLF:
570      while (p < endptr)
571        {
572        int extra = 0;
573        register int c = *((unsigned char *)p);
574    
575      else if (filenames_only)      if (utf8 && c >= 0xc0)
576        {        {
577        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
578        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
579          gcss = 6*extra;
580          c = (c & utf8_table3[extra]) << gcss;
581          for (gcii = 1; gcii <= extra; gcii++)
582            {
583            gcss -= 6;
584            c |= (p[gcii] & 0x3f) << gcss;
585            }
586        }        }
587    
588      else if (silent) return 0;      p += 1 + extra;
589    
590      else      switch (c)
591        {        {
592        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
593        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
594        fprintf(stdout, "%s\n", buffer);        return p;
595    
596          case 0x0d:    /* CR */
597          if (p < endptr && *p == 0x0a)
598            {
599            *lenptr = 2;
600            p++;
601            }
602          else *lenptr = 1;
603          return p;
604    
605          default:
606          break;
607        }        }
608        }   /* End of loop for ANYCRLF case */
609    
610      rc = 0;    *lenptr = 0;  /* Must have hit the end */
611      }    return endptr;
   }  
612    
613  if (count_only)    case EL_ANY:
614    {    while (p < endptr)
615    if (name != NULL) fprintf(stdout, "%s:", name);      {
616    fprintf(stdout, "%d\n", count);      int extra = 0;
617    }      register int c = *((unsigned char *)p);
618    
619  return rc;      if (utf8 && c >= 0xc0)
620  }        {
621          int gcii, gcss;
622          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
623          gcss = 6*extra;
624          c = (c & utf8_table3[extra]) << gcss;
625          for (gcii = 1; gcii <= extra; gcii++)
626            {
627            gcss -= 6;
628            c |= (p[gcii] & 0x3f) << gcss;
629            }
630          }
631    
632        p += 1 + extra;
633    
634        switch (c)
635          {
636          case 0x0a:    /* LF */
637          case 0x0b:    /* VT */
638          case 0x0c:    /* FF */
639          *lenptr = 1;
640          return p;
641    
642          case 0x0d:    /* CR */
643          if (p < endptr && *p == 0x0a)
644            {
645            *lenptr = 2;
646            p++;
647            }
648          else *lenptr = 1;
649          return p;
650    
651          case 0x85:    /* NEL */
652          *lenptr = utf8? 2 : 1;
653          return p;
654    
655          case 0x2028:  /* LS */
656          case 0x2029:  /* PS */
657          *lenptr = 3;
658          return p;
659    
660          default:
661          break;
662          }
663        }   /* End of loop for ANY case */
664    
665      *lenptr = 0;  /* Must have hit the end */
666      return endptr;
667      }     /* End of overall switch */
668    }
669    
670    
671    
672  /*************************************************  /*************************************************
673  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
674  *************************************************/  *************************************************/
675    
676  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
677    
678  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
679  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
680      startptr  start of available data
681    
682  if ((sep = isdirectory(filename)) != 0 && recurse)  Returns:    pointer to the start of the previous line
683    {  */
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
684    
685    if (dir == NULL)  static char *
686      {  previous_line(char *p, char *startptr)
687      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  {
688        strerror(errno));  switch(endlinetype)
689      return 2;    {
690      }    default:      /* Just in case */
691      case EL_LF:
692      p--;
693      while (p > startptr && p[-1] != '\n') p--;
694      return p;
695    
696      case EL_CR:
697      p--;
698      while (p > startptr && p[-1] != '\n') p--;
699      return p;
700    
701    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
702      for (;;)
703      {      {
704      int frc;      p -= 2;
705      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      while (p > startptr && p[-1] != '\n') p--;
706      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);      if (p <= startptr + 1 || p[-2] == '\r') return p;
     if (frc == 0 && rc == 1) rc = 0;  
707      }      }
708      return p;   /* But control should never get here */
709    
710    closedirectory(dir);    case EL_ANY:
711    return rc;    case EL_ANYCRLF:
712    }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
713      if (utf8) while ((*p & 0xc0) == 0x80) p--;
714    
715  /* If the file is not a directory, or we are not recursing, scan it. If this is    while (p > startptr)
716  the first and only argument at top level, we don't show the file name.      {
717  Otherwise, control is via the show_filenames variable. */      register int c;
718        char *pp = p - 1;
719    
720  in = fopen(filename, "r");      if (utf8)
721  if (in == NULL)        {
722    {        int extra = 0;
723    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));        while ((*pp & 0xc0) == 0x80) pp--;
724    return 2;        c = *((unsigned char *)pp);
725    }        if (c >= 0xc0)
726            {
727            int gcii, gcss;
728            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
729            gcss = 6*extra;
730            c = (c & utf8_table3[extra]) << gcss;
731            for (gcii = 1; gcii <= extra; gcii++)
732              {
733              gcss -= 6;
734              c |= (pp[gcii] & 0x3f) << gcss;
735              }
736            }
737          }
738        else c = *((unsigned char *)pp);
739    
740  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);      if (endlinetype == EL_ANYCRLF) switch (c)
741  fclose(in);        {
742  return rc;        case 0x0a:    /* LF */
743  }        case 0x0d:    /* CR */
744          return p;
745    
746          default:
747          break;
748          }
749    
750        else switch (c)
751          {
752          case 0x0a:    /* LF */
753          case 0x0b:    /* VT */
754          case 0x0c:    /* FF */
755          case 0x0d:    /* CR */
756          case 0x85:    /* NEL */
757          case 0x2028:  /* LS */
758          case 0x2029:  /* PS */
759          return p;
760    
761          default:
762          break;
763          }
764    
765  /*************************************************      p = pp;  /* Back one character */
766  *                Usage function                  *      }        /* End of loop for ANY case */
 *************************************************/  
767    
768  static int    return startptr;  /* Hit start of data */
769  usage(int rc)    }     /* End of overall switch */
 {  
 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
770  }  }
771    
772    
773    
774    
775    
776  /*************************************************  /*************************************************
777  *                Help function                   *  *       Print the previous "after" lines         *
778  *************************************************/  *************************************************/
779    
780  static void  /* This is called if we are about to lose said lines because of buffer filling,
781  help(void)  and at the end of the file. The data in the line is written using fwrite() so
782  {  that a binary zero does not terminate it.
783  option_item *op;  
784    Arguments:
785  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");    lastmatchnumber   the number of the last matching line, plus one
786  printf("Search for PATTERN in each FILE or standard input.\n");    lastmatchrestart  where we restarted after the last match
787  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");    endptr            end of available data
788      printname         filename for printing
789    
790  printf("Options:\n");  Returns:            nothing
791    */
792    
793  for (op = optionlist; op->one_char != 0; op++)  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
794      char *endptr, char *printname)
795    {
796    if (after_context > 0 && lastmatchnumber > 0)
797    {    {
798    int n;    int count = 0;
799    char s[4];    while (lastmatchrestart < endptr && count++ < after_context)
800    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");      {
801    printf("  %s --%s%n", s, op->long_name, &n);      int ellength;
802    n = 30 - n;      char *pp = lastmatchrestart;
803    if (n < 1) n = 1;      if (printname != NULL) fprintf(stdout, "%s-", printname);
804    printf("%.*s%s\n", n, "                    ", op->help_text);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
805        pp = end_of_line(pp, endptr, &ellength);
806        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
807        lastmatchrestart = pp;
808        }
809      hyphenpending = TRUE;
810    }    }
   
 printf("\n  -f<filename>  or  --file=<filename>\n");  
 printf("    Read patterns from <filename> instead of using a command line option.\n");  
 printf("    Trailing white space is removed; blanks lines are ignored.\n");  
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
811  }  }
812    
813    
814    
   
815  /*************************************************  /*************************************************
816  *                Handle an option                *  *            Grep an individual file             *
817  *************************************************/  *************************************************/
818    
819    /* This is called from grep_or_recurse() below. It uses a buffer that is three
820    times the value of MBUFTHIRD. The matching point is never allowed to stray into
821    the top third of the buffer, thus keeping more of the file available for
822    context printing or for multiline scanning. For large files, the pointer will
823    be in the middle third most of the time, so the bottom third is available for
824    "before" context printing.
825    
826    Arguments:
827      handle       the fopened FILE stream for a normal file
828                   the gzFile pointer when reading is via libz
829                   the BZFILE pointer when reading is via libbz2
830      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
831      printname    the file name if it is to be printed for each match
832                   or NULL if the file name is not to be printed
833                   it cannot be NULL if filenames[_nomatch]_only is set
834    
835    Returns:       0 if there was at least one match
836                   1 otherwise (no matches)
837                   2 if there is a read error on a .bz2 file
838    */
839    
840  static int  static int
841  handle_option(int letter, int options)  pcregrep(void *handle, int frtype, char *printname)
842  {  {
843  switch(letter)  int rc = 1;
844    {  int linenumber = 1;
845    case -1:  help(); exit(0);  int lastmatchnumber = 0;
846    case 'c': count_only = TRUE; break;  int count = 0;
847    case 'h': filenames = FALSE; break;  int filepos = 0;
848    case 'i': options |= PCRE_CASELESS; break;  int offsets[99];
849    case 'l': filenames_only = TRUE;  char *lastmatchrestart = NULL;
850    case 'n': number = TRUE; break;  char buffer[3*MBUFTHIRD];
851    case 'r': recurse = TRUE; break;  char *ptr = buffer;
852    case 's': silent = TRUE; break;  char *endptr;
853    size_t bufflength;
854    BOOL endhyphenpending = FALSE;
855    FILE *in = NULL;                    /* Ensure initialized */
856    
857    #ifdef SUPPORT_LIBZ
858    gzFile ingz = NULL;
859    #endif
860    
861    #ifdef SUPPORT_LIBBZ2
862    BZFILE *inbz2 = NULL;
863    #endif
864    
865    
866    /* Do the first read into the start of the buffer and set up the pointer to end
867    of what we have. In the case of libz, a non-zipped .gz file will be read as a
868    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
869    fail. */
870    
871    #ifdef SUPPORT_LIBZ
872    if (frtype == FR_LIBZ)
873      {
874      ingz = (gzFile)handle;
875      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
876      }
877    else
878    #endif
879    
880    #ifdef SUPPORT_LIBBZ2
881    if (frtype == FR_LIBBZ2)
882      {
883      inbz2 = (BZFILE *)handle;
884      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
885      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
886      }                                    /* without the cast it is unsigned. */
887    else
888    #endif
889    
890      {
891      in = (FILE *)handle;
892      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
893      }
894    
895    endptr = buffer + bufflength;
896    
897    /* Loop while the current pointer is not at the end of the file. For large
898    files, endptr will be at the end of the buffer when we are in the middle of the
899    file, but ptr will never get there, because as soon as it gets over 2/3 of the
900    way, the buffer is shifted left and re-filled. */
901    
902    while (ptr < endptr)
903      {
904      int i, endlinelength;
905      int mrc = 0;
906      BOOL match = FALSE;
907      char *matchptr = ptr;
908      char *t = ptr;
909      size_t length, linelength;
910    
911      /* At this point, ptr is at the start of a line. We need to find the length
912      of the subject string to pass to pcre_exec(). In multiline mode, it is the
913      length remainder of the data in the buffer. Otherwise, it is the length of
914      the next line. After matching, we always advance by the length of the next
915      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
916      that any match is constrained to be in the first line. */
917    
918      t = end_of_line(t, endptr, &endlinelength);
919      linelength = t - ptr - endlinelength;
920      length = multiline? (size_t)(endptr - ptr) : linelength;
921    
922      /* Extra processing for Jeffrey Friedl's debugging. */
923    
924    #ifdef JFRIEDL_DEBUG
925      if (jfriedl_XT || jfriedl_XR)
926      {
927          #include <sys/time.h>
928          #include <time.h>
929          struct timeval start_time, end_time;
930          struct timezone dummy;
931    
932          if (jfriedl_XT)
933          {
934              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
935              const char *orig = ptr;
936              ptr = malloc(newlen + 1);
937              if (!ptr) {
938                      printf("out of memory");
939                      exit(2);
940              }
941              endptr = ptr;
942              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
943              for (i = 0; i < jfriedl_XT; i++) {
944                      strncpy(endptr, orig,  length);
945                      endptr += length;
946              }
947              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
948              length = newlen;
949          }
950    
951          if (gettimeofday(&start_time, &dummy) != 0)
952                  perror("bad gettimeofday");
953    
954    
955          for (i = 0; i < jfriedl_XR; i++)
956              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
957    
958          if (gettimeofday(&end_time, &dummy) != 0)
959                  perror("bad gettimeofday");
960    
961          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
962                          -
963                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
964    
965          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
966          return 0;
967      }
968    #endif
969    
970      /* We come back here after a match when the -o option (only_matching) is set,
971      in order to find any further matches in the same line. */
972    
973      ONLY_MATCHING_RESTART:
974    
975      /* Run through all the patterns until one matches. Note that we don't include
976      the final newline in the subject string. */
977    
978      for (i = 0; i < pattern_count; i++)
979        {
980        mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
981          offsets, 99);
982        if (mrc >= 0) { match = TRUE; break; }
983        if (mrc != PCRE_ERROR_NOMATCH)
984          {
985          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
986          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
987          fprintf(stderr, "this line:\n");
988          fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
989          fprintf(stderr, "\n");
990          if (error_count == 0 &&
991              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
992            {
993            fprintf(stderr, "pcregrep: error %d means that a resource limit "
994              "was exceeded\n", mrc);
995            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
996            }
997          if (error_count++ > 20)
998            {
999            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
1000            exit(2);
1001            }
1002          match = invert;    /* No more matching; don't show the line again */
1003          break;
1004          }
1005        }
1006    
1007      /* If it's a match or a not-match (as required), do what's wanted. */
1008    
1009      if (match != invert)
1010        {
1011        BOOL hyphenprinted = FALSE;
1012    
1013        /* We've failed if we want a file that doesn't have any matches. */
1014    
1015        if (filenames == FN_NOMATCH_ONLY) return 1;
1016    
1017        /* Just count if just counting is wanted. */
1018    
1019        if (count_only) count++;
1020    
1021        /* If all we want is a file name, there is no need to scan any more lines
1022        in the file. */
1023    
1024        else if (filenames == FN_ONLY)
1025          {
1026          fprintf(stdout, "%s\n", printname);
1027          return 0;
1028          }
1029    
1030        /* Likewise, if all we want is a yes/no answer. */
1031    
1032        else if (quiet) return 0;
1033    
1034        /* The --only-matching option prints just the substring that matched, and
1035        the --file-offsets and --line-offsets options output offsets for the
1036        matching substring (they both force --only-matching). None of these options
1037        prints any context. Afterwards, adjust the start and length, and then jump
1038        back to look for further matches in the same line. If we are in invert
1039        mode, however, nothing is printed - this could be still useful because the
1040        return code is set. */
1041    
1042        else if (only_matching)
1043          {
1044          if (!invert)
1045            {
1046            if (printname != NULL) fprintf(stdout, "%s:", printname);
1047            if (number) fprintf(stdout, "%d:", linenumber);
1048            if (line_offsets)
1049              fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
1050                offsets[1] - offsets[0]);
1051            else if (file_offsets)
1052              fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1053                offsets[1] - offsets[0]);
1054            else
1055              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1056            fprintf(stdout, "\n");
1057            matchptr += offsets[1];
1058            length -= offsets[1];
1059            match = FALSE;
1060            goto ONLY_MATCHING_RESTART;
1061            }
1062          }
1063    
1064        /* This is the default case when none of the above options is set. We print
1065        the matching lines(s), possibly preceded and/or followed by other lines of
1066        context. */
1067    
1068        else
1069          {
1070          /* See if there is a requirement to print some "after" lines from a
1071          previous match. We never print any overlaps. */
1072    
1073          if (after_context > 0 && lastmatchnumber > 0)
1074            {
1075            int ellength;
1076            int linecount = 0;
1077            char *p = lastmatchrestart;
1078    
1079            while (p < ptr && linecount < after_context)
1080              {
1081              p = end_of_line(p, ptr, &ellength);
1082              linecount++;
1083              }
1084    
1085            /* It is important to advance lastmatchrestart during this printing so
1086            that it interacts correctly with any "before" printing below. Print
1087            each line's data using fwrite() in case there are binary zeroes. */
1088    
1089            while (lastmatchrestart < p)
1090              {
1091              char *pp = lastmatchrestart;
1092              if (printname != NULL) fprintf(stdout, "%s-", printname);
1093              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1094              pp = end_of_line(pp, endptr, &ellength);
1095              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1096              lastmatchrestart = pp;
1097              }
1098            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1099            }
1100    
1101          /* If there were non-contiguous lines printed above, insert hyphens. */
1102    
1103          if (hyphenpending)
1104            {
1105            fprintf(stdout, "--\n");
1106            hyphenpending = FALSE;
1107            hyphenprinted = TRUE;
1108            }
1109    
1110          /* See if there is a requirement to print some "before" lines for this
1111          match. Again, don't print overlaps. */
1112    
1113          if (before_context > 0)
1114            {
1115            int linecount = 0;
1116            char *p = ptr;
1117    
1118            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1119                   linecount < before_context)
1120              {
1121              linecount++;
1122              p = previous_line(p, buffer);
1123              }
1124    
1125            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1126              fprintf(stdout, "--\n");
1127    
1128            while (p < ptr)
1129              {
1130              int ellength;
1131              char *pp = p;
1132              if (printname != NULL) fprintf(stdout, "%s-", printname);
1133              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1134              pp = end_of_line(pp, endptr, &ellength);
1135              fwrite(p, 1, pp - p, stdout);
1136              p = pp;
1137              }
1138            }
1139    
1140          /* Now print the matching line(s); ensure we set hyphenpending at the end
1141          of the file if any context lines are being output. */
1142    
1143          if (after_context > 0 || before_context > 0)
1144            endhyphenpending = TRUE;
1145    
1146          if (printname != NULL) fprintf(stdout, "%s:", printname);
1147          if (number) fprintf(stdout, "%d:", linenumber);
1148    
1149          /* In multiline mode, we want to print to the end of the line in which
1150          the end of the matched string is found, so we adjust linelength and the
1151          line number appropriately, but only when there actually was a match
1152          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1153          the match will always be before the first newline sequence. */
1154    
1155          if (multiline)
1156            {
1157            int ellength;
1158            char *endmatch = ptr;
1159            if (!invert)
1160              {
1161              endmatch += offsets[1];
1162              t = ptr;
1163              while (t < endmatch)
1164                {
1165                t = end_of_line(t, endptr, &ellength);
1166                if (t <= endmatch) linenumber++; else break;
1167                }
1168              }
1169            endmatch = end_of_line(endmatch, endptr, &ellength);
1170            linelength = endmatch - ptr - ellength;
1171            }
1172    
1173          /*** NOTE: Use only fwrite() to output the data line, so that binary
1174          zeroes are treated as just another data character. */
1175    
1176          /* This extra option, for Jeffrey Friedl's debugging requirements,
1177          replaces the matched string, or a specific captured string if it exists,
1178          with X. When this happens, colouring is ignored. */
1179    
1180    #ifdef JFRIEDL_DEBUG
1181          if (S_arg >= 0 && S_arg < mrc)
1182            {
1183            int first = S_arg * 2;
1184            int last  = first + 1;
1185            fwrite(ptr, 1, offsets[first], stdout);
1186            fprintf(stdout, "X");
1187            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1188            }
1189          else
1190    #endif
1191    
1192          /* We have to split the line(s) up if colouring. */
1193    
1194          if (do_colour)
1195            {
1196            fwrite(ptr, 1, offsets[0], stdout);
1197            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1198            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1199            fprintf(stdout, "%c[00m", 0x1b);
1200            fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1201              stdout);
1202            }
1203          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1204          }
1205    
1206        /* End of doing what has to be done for a match */
1207    
1208        rc = 0;    /* Had some success */
1209    
1210        /* Remember where the last match happened for after_context. We remember
1211        where we are about to restart, and that line's number. */
1212    
1213        lastmatchrestart = ptr + linelength + endlinelength;
1214        lastmatchnumber = linenumber + 1;
1215        }
1216    
1217      /* For a match in multiline inverted mode (which of course did not cause
1218      anything to be printed), we have to move on to the end of the match before
1219      proceeding. */
1220    
1221      if (multiline && invert && match)
1222        {
1223        int ellength;
1224        char *endmatch = ptr + offsets[1];
1225        t = ptr;
1226        while (t < endmatch)
1227          {
1228          t = end_of_line(t, endptr, &ellength);
1229          if (t <= endmatch) linenumber++; else break;
1230          }
1231        endmatch = end_of_line(endmatch, endptr, &ellength);
1232        linelength = endmatch - ptr - ellength;
1233        }
1234    
1235      /* Advance to after the newline and increment the line number. The file
1236      offset to the current line is maintained in filepos. */
1237    
1238      ptr += linelength + endlinelength;
1239      filepos += linelength + endlinelength;
1240      linenumber++;
1241    
1242      /* If we haven't yet reached the end of the file (the buffer is full), and
1243      the current point is in the top 1/3 of the buffer, slide the buffer down by
1244      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1245      about to be lost, print them. */
1246    
1247      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1248        {
1249        if (after_context > 0 &&
1250            lastmatchnumber > 0 &&
1251            lastmatchrestart < buffer + MBUFTHIRD)
1252          {
1253          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1254          lastmatchnumber = 0;
1255          }
1256    
1257        /* Now do the shuffle */
1258    
1259        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1260        ptr -= MBUFTHIRD;
1261    
1262    #ifdef SUPPORT_LIBZ
1263        if (frtype == FR_LIBZ)
1264          bufflength = 2*MBUFTHIRD +
1265            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1266        else
1267    #endif
1268    
1269    #ifdef SUPPORT_LIBBZ2
1270        if (frtype == FR_LIBBZ2)
1271          bufflength = 2*MBUFTHIRD +
1272            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1273        else
1274    #endif
1275    
1276        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1277    
1278        endptr = buffer + bufflength;
1279    
1280        /* Adjust any last match point */
1281    
1282        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1283        }
1284      }     /* Loop through the whole file */
1285    
1286    /* End of file; print final "after" lines if wanted; do_after_lines sets
1287    hyphenpending if it prints something. */
1288    
1289    if (!only_matching && !count_only)
1290      {
1291      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1292      hyphenpending |= endhyphenpending;
1293      }
1294    
1295    /* Print the file name if we are looking for those without matches and there
1296    were none. If we found a match, we won't have got this far. */
1297    
1298    if (filenames == FN_NOMATCH_ONLY)
1299      {
1300      fprintf(stdout, "%s\n", printname);
1301      return 0;
1302      }
1303    
1304    /* Print the match count if wanted */
1305    
1306    if (count_only)
1307      {
1308      if (printname != NULL) fprintf(stdout, "%s:", printname);
1309      fprintf(stdout, "%d\n", count);
1310      }
1311    
1312    return rc;
1313    }
1314    
1315    
1316    
1317    /*************************************************
1318    *     Grep a file or recurse into a directory    *
1319    *************************************************/
1320    
1321    /* Given a path name, if it's a directory, scan all the files if we are
1322    recursing; if it's a file, grep it.
1323    
1324    Arguments:
1325      pathname          the path to investigate
1326      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1327      only_one_at_top   TRUE if the path is the only one at toplevel
1328    
1329    Returns:   0 if there was at least one match
1330               1 if there were no matches
1331               2 there was some kind of error
1332    
1333    However, file opening failures are suppressed if "silent" is set.
1334    */
1335    
1336    static int
1337    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1338    {
1339    int rc = 1;
1340    int sep;
1341    int frtype;
1342    int pathlen;
1343    void *handle;
1344    FILE *in = NULL;           /* Ensure initialized */
1345    
1346    #ifdef SUPPORT_LIBZ
1347    gzFile ingz = NULL;
1348    #endif
1349    
1350    #ifdef SUPPORT_LIBBZ2
1351    BZFILE *inbz2 = NULL;
1352    #endif
1353    
1354    /* If the file name is "-" we scan stdin */
1355    
1356    if (strcmp(pathname, "-") == 0)
1357      {
1358      return pcregrep(stdin, FR_PLAIN,
1359        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1360          stdin_name : NULL);
1361      }
1362    
1363    /* If the file is a directory, skip if skipping or if we are recursing, scan
1364    each file within it, subject to any include or exclude patterns that were set.
1365    The scanning code is localized so it can be made system-specific. */
1366    
1367    if ((sep = isdirectory(pathname)) != 0)
1368      {
1369      if (dee_action == dee_SKIP) return 1;
1370      if (dee_action == dee_RECURSE)
1371        {
1372        char buffer[1024];
1373        char *nextfile;
1374        directory_type *dir = opendirectory(pathname);
1375    
1376        if (dir == NULL)
1377          {
1378          if (!silent)
1379            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1380              strerror(errno));
1381          return 2;
1382          }
1383    
1384        while ((nextfile = readdirectory(dir)) != NULL)
1385          {
1386          int frc, blen;
1387          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1388          blen = strlen(buffer);
1389    
1390          if (exclude_compiled != NULL &&
1391              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1392            continue;
1393    
1394          if (include_compiled != NULL &&
1395              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1396            continue;
1397    
1398          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1399          if (frc > 1) rc = frc;
1400           else if (frc == 0 && rc == 1) rc = 0;
1401          }
1402    
1403        closedirectory(dir);
1404        return rc;
1405        }
1406      }
1407    
1408    /* If the file is not a directory and not a regular file, skip it if that's
1409    been requested. */
1410    
1411    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1412    
1413    /* Control reaches here if we have a regular file, or if we have a directory
1414    and recursion or skipping was not requested, or if we have anything else and
1415    skipping was not requested. The scan proceeds. If this is the first and only
1416    argument at top level, we don't show the file name, unless we are only showing
1417    the file name, or the filename was forced (-H). */
1418    
1419    pathlen = strlen(pathname);
1420    
1421    /* Open using zlib if it is supported and the file name ends with .gz. */
1422    
1423    #ifdef SUPPORT_LIBZ
1424    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1425      {
1426      ingz = gzopen(pathname, "rb");
1427      if (ingz == NULL)
1428        {
1429        if (!silent)
1430          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1431            strerror(errno));
1432        return 2;
1433        }
1434      handle = (void *)ingz;
1435      frtype = FR_LIBZ;
1436      }
1437    else
1438    #endif
1439    
1440    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1441    
1442    #ifdef SUPPORT_LIBBZ2
1443    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1444      {
1445      inbz2 = BZ2_bzopen(pathname, "rb");
1446      handle = (void *)inbz2;
1447      frtype = FR_LIBBZ2;
1448      }
1449    else
1450    #endif
1451    
1452    /* Otherwise use plain fopen(). The label is so that we can come back here if
1453    an attempt to read a .bz2 file indicates that it really is a plain file. */
1454    
1455    #ifdef SUPPORT_LIBBZ2
1456    PLAIN_FILE:
1457    #endif
1458      {
1459      in = fopen(pathname, "r");
1460      handle = (void *)in;
1461      frtype = FR_PLAIN;
1462      }
1463    
1464    /* All the opening methods return errno when they fail. */
1465    
1466    if (handle == NULL)
1467      {
1468      if (!silent)
1469        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1470          strerror(errno));
1471      return 2;
1472      }
1473    
1474    /* Now grep the file */
1475    
1476    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1477      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1478    
1479    /* Close in an appropriate manner. */
1480    
1481    #ifdef SUPPORT_LIBZ
1482    if (frtype == FR_LIBZ)
1483      gzclose(ingz);
1484    else
1485    #endif
1486    
1487    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1488    read failed. If the error indicates that the file isn't in fact bzipped, try
1489    again as a normal file. */
1490    
1491    #ifdef SUPPORT_LIBBZ2
1492    if (frtype == FR_LIBBZ2)
1493      {
1494      if (rc == 2)
1495        {
1496        int errnum;
1497        const char *err = BZ2_bzerror(inbz2, &errnum);
1498        if (errnum == BZ_DATA_ERROR_MAGIC)
1499          {
1500          BZ2_bzclose(inbz2);
1501          goto PLAIN_FILE;
1502          }
1503        else if (!silent)
1504          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1505            pathname, err);
1506        }
1507      BZ2_bzclose(inbz2);
1508      }
1509    else
1510    #endif
1511    
1512    /* Normal file close */
1513    
1514    fclose(in);
1515    
1516    /* Pass back the yield from pcregrep(). */
1517    
1518    return rc;
1519    }
1520    
1521    
1522    
1523    
1524    /*************************************************
1525    *                Usage function                  *
1526    *************************************************/
1527    
1528    static int
1529    usage(int rc)
1530    {
1531    option_item *op;
1532    fprintf(stderr, "Usage: pcregrep [-");
1533    for (op = optionlist; op->one_char != 0; op++)
1534      {
1535      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1536      }
1537    fprintf(stderr, "] [long options] [pattern] [files]\n");
1538    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1539      "options.\n");
1540    return rc;
1541    }
1542    
1543    
1544    
1545    
1546    /*************************************************
1547    *                Help function                   *
1548    *************************************************/
1549    
1550    static void
1551    help(void)
1552    {
1553    option_item *op;
1554    
1555    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1556    printf("Search for PATTERN in each FILE or standard input.\n");
1557    printf("PATTERN must be present if neither -e nor -f is used.\n");
1558    printf("\"-\" can be used as a file name to mean STDIN.\n");
1559    
1560    #ifdef SUPPORT_LIBZ
1561    printf("Files whose names end in .gz are read using zlib.\n");
1562    #endif
1563    
1564    #ifdef SUPPORT_LIBBZ2
1565    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1566    #endif
1567    
1568    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1569    printf("Other files and the standard input are read as plain files.\n\n");
1570    #else
1571    printf("All files are read as plain files, without any interpretation.\n\n");
1572    #endif
1573    
1574    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1575    printf("Options:\n");
1576    
1577    for (op = optionlist; op->one_char != 0; op++)
1578      {
1579      int n;
1580      char s[4];
1581      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1582      n = 30 - printf("  %s --%s", s, op->long_name);
1583      if (n < 1) n = 1;
1584      printf("%.*s%s\n", n, "                    ", op->help_text);
1585      }
1586    
1587    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1588    printf("trailing white space is removed and blank lines are ignored.\n");
1589    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1590    
1591    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1592    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1593    }
1594    
1595    
1596    
1597    
1598    /*************************************************
1599    *    Handle a single-letter, no data option      *
1600    *************************************************/
1601    
1602    static int
1603    handle_option(int letter, int options)
1604    {
1605    switch(letter)
1606      {
1607      case N_FOFFSETS: file_offsets = TRUE; break;
1608      case N_HELP: help(); exit(0);
1609      case N_LOFFSETS: line_offsets = number = TRUE; break;
1610      case 'c': count_only = TRUE; break;
1611      case 'F': process_options |= PO_FIXED_STRINGS; break;
1612      case 'H': filenames = FN_FORCE; break;
1613      case 'h': filenames = FN_NONE; break;
1614      case 'i': options |= PCRE_CASELESS; break;
1615      case 'l': filenames = FN_ONLY; break;
1616      case 'L': filenames = FN_NOMATCH_ONLY; break;
1617      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1618      case 'n': number = TRUE; break;
1619      case 'o': only_matching = TRUE; break;
1620      case 'q': quiet = TRUE; break;
1621      case 'r': dee_action = dee_RECURSE; break;
1622      case 's': silent = TRUE; break;
1623      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1624    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1625    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1626      case 'x': process_options |= PO_LINE_MATCH; break;
1627    
1628    case 'V':    case 'V':
1629    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1630    exit(0);    exit(0);
1631    break;    break;
1632    
# Line 371  return options; Line 1642  return options;
1642    
1643    
1644  /*************************************************  /*************************************************
1645    *          Construct printed ordinal             *
1646    *************************************************/
1647    
1648    /* This turns a number into "1st", "3rd", etc. */
1649    
1650    static char *
1651    ordin(int n)
1652    {
1653    static char buffer[8];
1654    char *p = buffer;
1655    sprintf(p, "%d", n);
1656    while (*p != 0) p++;
1657    switch (n%10)
1658      {
1659      case 1: strcpy(p, "st"); break;
1660      case 2: strcpy(p, "nd"); break;
1661      case 3: strcpy(p, "rd"); break;
1662      default: strcpy(p, "th"); break;
1663      }
1664    return buffer;
1665    }
1666    
1667    
1668    
1669    /*************************************************
1670    *          Compile a single pattern              *
1671    *************************************************/
1672    
1673    /* When the -F option has been used, this is called for each substring.
1674    Otherwise it's called for each supplied pattern.
1675    
1676    Arguments:
1677      pattern        the pattern string
1678      options        the PCRE options
1679      filename       the file name, or NULL for a command-line pattern
1680      count          0 if this is the only command line pattern, or
1681                     number of the command line pattern, or
1682                     linenumber for a pattern from a file
1683    
1684    Returns:         TRUE on success, FALSE after an error
1685    */
1686    
1687    static BOOL
1688    compile_single_pattern(char *pattern, int options, char *filename, int count)
1689    {
1690    char buffer[MBUFTHIRD + 16];
1691    const char *error;
1692    int errptr;
1693    
1694    if (pattern_count >= MAX_PATTERN_COUNT)
1695      {
1696      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1697        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1698      return FALSE;
1699      }
1700    
1701    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1702      suffix[process_options]);
1703    pattern_list[pattern_count] =
1704      pcre_compile(buffer, options, &error, &errptr, pcretables);
1705    if (pattern_list[pattern_count] != NULL)
1706      {
1707      pattern_count++;
1708      return TRUE;
1709      }
1710    
1711    /* Handle compile errors */
1712    
1713    errptr -= (int)strlen(prefix[process_options]);
1714    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1715    
1716    if (filename == NULL)
1717      {
1718      if (count == 0)
1719        fprintf(stderr, "pcregrep: Error in command-line regex "
1720          "at offset %d: %s\n", errptr, error);
1721      else
1722        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1723          "at offset %d: %s\n", ordin(count), errptr, error);
1724      }
1725    else
1726      {
1727      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1728        "at offset %d: %s\n", count, filename, errptr, error);
1729      }
1730    
1731    return FALSE;
1732    }
1733    
1734    
1735    
1736    /*************************************************
1737    *           Compile one supplied pattern         *
1738    *************************************************/
1739    
1740    /* When the -F option has been used, each string may be a list of strings,
1741    separated by line breaks. They will be matched literally.
1742    
1743    Arguments:
1744      pattern        the pattern string
1745      options        the PCRE options
1746      filename       the file name, or NULL for a command-line pattern
1747      count          0 if this is the only command line pattern, or
1748                     number of the command line pattern, or
1749                     linenumber for a pattern from a file
1750    
1751    Returns:         TRUE on success, FALSE after an error
1752    */
1753    
1754    static BOOL
1755    compile_pattern(char *pattern, int options, char *filename, int count)
1756    {
1757    if ((process_options & PO_FIXED_STRINGS) != 0)
1758      {
1759      char *eop = pattern + strlen(pattern);
1760      char buffer[MBUFTHIRD];
1761      for(;;)
1762        {
1763        int ellength;
1764        char *p = end_of_line(pattern, eop, &ellength);
1765        if (ellength == 0)
1766          return compile_single_pattern(pattern, options, filename, count);
1767        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1768        pattern = p;
1769        if (!compile_single_pattern(buffer, options, filename, count))
1770          return FALSE;
1771        }
1772      }
1773    else return compile_single_pattern(pattern, options, filename, count);
1774    }
1775    
1776    
1777    
1778    /*************************************************
1779  *                Main program                    *  *                Main program                    *
1780  *************************************************/  *************************************************/
1781    
1782    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1783    
1784  int  int
1785  main(int argc, char **argv)  main(int argc, char **argv)
1786  {  {
1787  int i, j;  int i, j;
1788  int rc = 1;  int rc = 1;
1789  int options = 0;  int pcre_options = 0;
1790    int cmd_pattern_count = 0;
1791    int hint_count = 0;
1792  int errptr;  int errptr;
 const char *error;  
1793  BOOL only_one_at_top;  BOOL only_one_at_top;
1794    char *patterns[MAX_PATTERN_COUNT];
1795    const char *locale_from = "--locale";
1796    const char *error;
1797    
1798    /* Set the default line ending value from the default in the PCRE library;
1799    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1800    */
1801    
1802    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1803    switch(i)
1804      {
1805      default:                 newline = (char *)"lf"; break;
1806      case '\r':               newline = (char *)"cr"; break;
1807      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1808      case -1:                 newline = (char *)"any"; break;
1809      case -2:                 newline = (char *)"anycrlf"; break;
1810      }
1811    
1812  /* Process the options */  /* Process the options */
1813    
1814  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
1815    {    {
1816      option_item *op = NULL;
1817      char *option_data = (char *)"";    /* default to keep compiler happy */
1818      BOOL longop;
1819      BOOL longopwasequals = FALSE;
1820    
1821    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1822    
1823    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1824      but only if we have previously had -e or -f to define the patterns. */
1825    
1826      if (argv[i][1] == 0)
1827        {
1828        if (pattern_filename != NULL || pattern_count > 0) break;
1829          else exit(usage(2));
1830        }
1831    
1832      /* Handle a long name option, or -- to terminate the options */
1833    
1834    if (argv[i][1] == '-')    if (argv[i][1] == '-')
1835      {      {
1836      option_item *op;      char *arg = argv[i] + 2;
1837        char *argequals = strchr(arg, '=');
1838    
1839      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
1840        {        {
1841        pattern_filename = argv[i] + 7;        i++;
1842        continue;        break;                /* out of the options-handling loop */
1843        }        }
1844    
1845        longop = TRUE;
1846    
1847        /* Some long options have data that follows after =, for example file=name.
1848        Some options have variations in the long name spelling: specifically, we
1849        allow "regexp" because GNU grep allows it, though I personally go along
1850        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1851        These options are entered in the table as "regex(p)". No option is in both
1852        these categories, fortunately. */
1853    
1854      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1855        {        {
1856        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1857          char *equals = strchr(op->long_name, '=');
1858          if (opbra == NULL)     /* Not a (p) case */
1859          {          {
1860          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1861          break;            {
1862              if (strcmp(arg, op->long_name) == 0) break;
1863              }
1864            else                 /* Special case xxx=data */
1865              {
1866              int oplen = equals - op->long_name;
1867              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1868              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1869                {
1870                option_data = arg + arglen;
1871                if (*option_data == '=')
1872                  {
1873                  option_data++;
1874                  longopwasequals = TRUE;
1875                  }
1876                break;
1877                }
1878              }
1879            }
1880          else                   /* Special case xxxx(p) */
1881            {
1882            char buff1[24];
1883            char buff2[24];
1884            int baselen = opbra - op->long_name;
1885            sprintf(buff1, "%.*s", baselen, op->long_name);
1886            sprintf(buff2, "%s%.*s", buff1,
1887              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1888            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1889              break;
1890          }          }
1891        }        }
1892    
1893      if (op->one_char == 0)      if (op->one_char == 0)
1894        {        {
1895        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 417  for (i = 1; i < argc; i++) Line 1897  for (i = 1; i < argc; i++)
1897        }        }
1898      }      }
1899    
1900    /* One-char options */  
1901      /* Jeffrey Friedl's debugging harness uses these additional options which
1902      are not in the right form for putting in the option table because they use
1903      only one hyphen, yet are more than one character long. By putting them
1904      separately here, they will not get displayed as part of the help() output,
1905      but I don't think Jeffrey will care about that. */
1906    
1907    #ifdef JFRIEDL_DEBUG
1908      else if (strcmp(argv[i], "-pre") == 0) {
1909              jfriedl_prefix = argv[++i];
1910              continue;
1911      } else if (strcmp(argv[i], "-post") == 0) {
1912              jfriedl_postfix = argv[++i];
1913              continue;
1914      } else if (strcmp(argv[i], "-XT") == 0) {
1915              sscanf(argv[++i], "%d", &jfriedl_XT);
1916              continue;
1917      } else if (strcmp(argv[i], "-XR") == 0) {
1918              sscanf(argv[++i], "%d", &jfriedl_XR);
1919              continue;
1920      }
1921    #endif
1922    
1923    
1924      /* One-char options; many that have no data may be in a single argument; we
1925      continue till we hit the last one or one that needs data. */
1926    
1927    else    else
1928      {      {
1929      char *s = argv[i] + 1;      char *s = argv[i] + 1;
1930        longop = FALSE;
1931      while (*s != 0)      while (*s != 0)
1932        {        {
1933        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
1934            { if (*s == op->one_char) break; }
1935          if (op->one_char == 0)
1936          {          {
1937          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1938          if (pattern_filename[0] == 0)            *s, argv[i]);
1939            {          exit(usage(2));
1940            if (i >= argc - 1)          }
1941              {        if (op->type != OP_NODATA || s[1] == 0)
1942              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
1943              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
1944          break;          break;
1945          }          }
1946        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1947        }        }
1948      }      }
   }  
1949    
1950  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
1951  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
1952      something in the PCRE options. */
1953    
1954  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
1955    {      {
1956    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
1957    return 2;      continue;
1958    }      }
1959    
1960  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1961      either has a value or defaults to something. It cannot have data in a
1962      separate item. At the moment, the only such options are "colo(u)r" and
1963      Jeffrey Friedl's special -S debugging option. */
1964    
1965  if (pattern_filename != NULL)    if (*option_data == 0 &&
1966    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
1967      {      {
1968      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
1969        strerror(errno));        {
1970      return 2;        case N_COLOUR:
1971          colour_option = (char *)"auto";
1972          break;
1973    #ifdef JFRIEDL_DEBUG
1974          case 'S':
1975          S_arg = 0;
1976          break;
1977    #endif
1978          }
1979        continue;
1980      }      }
1981    while (fgets(buffer, sizeof(buffer), f) != NULL)  
1982      /* Otherwise, find the data string for the option. */
1983    
1984      if (*option_data == 0)
1985      {      {
1986      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
1987      if (pattern_count >= MAX_PATTERN_COUNT)        {
1988          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1989          exit(usage(2));
1990          }
1991        option_data = argv[++i];
1992        }
1993    
1994      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1995      multiple times to create a list of patterns. */
1996    
1997      if (op->type == OP_PATLIST)
1998        {
1999        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2000        {        {
2001        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2002          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
2003        return 2;        return 2;
2004        }        }
2005      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
2006      if (s == buffer) continue;      }
2007      *s = 0;  
2008      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
2009        &errptr, NULL);  
2010      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2011        {
2012        *((char **)op->dataptr) = option_data;
2013        }
2014      else
2015        {
2016        char *endptr;
2017        int n = strtoul(option_data, &endptr, 10);
2018        if (*endptr != 0)
2019        {        {
2020        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
2021          pattern_count, errptr, error);          {
2022        return 2;          char *equals = strchr(op->long_name, '=');
2023            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2024              equals - op->long_name;
2025            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2026              option_data, nlen, op->long_name);
2027            }
2028          else
2029            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2030              option_data, op->one_char);
2031          exit(usage(2));
2032        }        }
2033        *((int *)op->dataptr) = n;
2034        }
2035      }
2036    
2037    /* Options have been decoded. If -C was used, its value is used as a default
2038    for -A and -B. */
2039    
2040    if (both_context > 0)
2041      {
2042      if (after_context == 0) after_context = both_context;
2043      if (before_context == 0) before_context = both_context;
2044      }
2045    
2046    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2047    However, the latter two set the only_matching flag. */
2048    
2049    if ((only_matching && (file_offsets || line_offsets)) ||
2050        (file_offsets && line_offsets))
2051      {
2052      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2053        "and/or --line-offsets\n");
2054      exit(usage(2));
2055      }
2056    
2057    if (file_offsets || line_offsets) only_matching = TRUE;
2058    
2059    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2060    LC_ALL environment variable is set, and if so, use it. */
2061    
2062    if (locale == NULL)
2063      {
2064      locale = getenv("LC_ALL");
2065      locale_from = "LCC_ALL";
2066      }
2067    
2068    if (locale == NULL)
2069      {
2070      locale = getenv("LC_CTYPE");
2071      locale_from = "LC_CTYPE";
2072      }
2073    
2074    /* If a locale has been provided, set it, and generate the tables the PCRE
2075    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2076    
2077    if (locale != NULL)
2078      {
2079      if (setlocale(LC_CTYPE, locale) == NULL)
2080        {
2081        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2082          locale, locale_from);
2083        return 2;
2084      }      }
2085    fclose(f);    pcretables = pcre_maketables();
2086    }    }
2087    
2088  /* If no file name, a single regex must be given inline */  /* Sort out colouring */
2089    
2090    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2091      {
2092      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2093      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2094      else
2095        {
2096        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2097          colour_option);
2098        return 2;
2099        }
2100      if (do_colour)
2101        {
2102        char *cs = getenv("PCREGREP_COLOUR");
2103        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2104        if (cs != NULL) colour_string = cs;
2105        }
2106      }
2107    
2108    /* Interpret the newline type; the default settings are Unix-like. */
2109    
2110    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2111      {
2112      pcre_options |= PCRE_NEWLINE_CR;
2113      endlinetype = EL_CR;
2114      }
2115    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2116      {
2117      pcre_options |= PCRE_NEWLINE_LF;
2118      endlinetype = EL_LF;
2119      }
2120    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2121      {
2122      pcre_options |= PCRE_NEWLINE_CRLF;
2123      endlinetype = EL_CRLF;
2124      }
2125    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2126      {
2127      pcre_options |= PCRE_NEWLINE_ANY;
2128      endlinetype = EL_ANY;
2129      }
2130    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2131      {
2132      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2133      endlinetype = EL_ANYCRLF;
2134      }
2135  else  else
2136    {    {
2137    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2138    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
2139    if (pattern_list[0] == NULL)    }
2140    
2141    /* Interpret the text values for -d and -D */
2142    
2143    if (dee_option != NULL)
2144      {
2145      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2146      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2147      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2148      else
2149      {      {
2150      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
       error);  
2151      return 2;      return 2;
2152      }      }
   pattern_count++;  
2153    }    }
2154    
2155  /* Study the regular expressions, as we will be running them may times */  if (DEE_option != NULL)
2156      {
2157      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2158      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2159      else
2160        {
2161        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2162        return 2;
2163        }
2164      }
2165    
2166    /* Check the values for Jeffrey Friedl's debugging options. */
2167    
2168    #ifdef JFRIEDL_DEBUG
2169    if (S_arg > 9)
2170      {
2171      fprintf(stderr, "pcregrep: bad value for -S option\n");
2172      return 2;
2173      }
2174    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2175      {
2176      if (jfriedl_XT == 0) jfriedl_XT = 1;
2177      if (jfriedl_XR == 0) jfriedl_XR = 1;
2178      }
2179    #endif
2180    
2181    /* Get memory to store the pattern and hints lists. */
2182    
2183    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2184    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2185    
2186    if (pattern_list == NULL || hints_list == NULL)
2187      {
2188      fprintf(stderr, "pcregrep: malloc failed\n");
2189      goto EXIT2;
2190      }
2191    
2192    /* If no patterns were provided by -e, and there is no file provided by -f,
2193    the first argument is the one and only pattern, and it must exist. */
2194    
2195    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2196      {
2197      if (i >= argc) return usage(2);
2198      patterns[cmd_pattern_count++] = argv[i++];
2199      }
2200    
2201    /* Compile the patterns that were provided on the command line, either by
2202    multiple uses of -e or as a single unkeyed pattern. */
2203    
2204    for (j = 0; j < cmd_pattern_count; j++)
2205      {
2206      if (!compile_pattern(patterns[j], pcre_options, NULL,
2207           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2208        goto EXIT2;
2209      }
2210    
2211    /* Compile the regular expressions that are provided in a file. */
2212    
2213    if (pattern_filename != NULL)
2214      {
2215      int linenumber = 0;
2216      FILE *f;
2217      char *filename;
2218      char buffer[MBUFTHIRD];
2219    
2220      if (strcmp(pattern_filename, "-") == 0)
2221        {
2222        f = stdin;
2223        filename = stdin_name;
2224        }
2225      else
2226        {
2227        f = fopen(pattern_filename, "r");
2228        if (f == NULL)
2229          {
2230          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2231            strerror(errno));
2232          goto EXIT2;
2233          }
2234        filename = pattern_filename;
2235        }
2236    
2237      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2238        {
2239        char *s = buffer + (int)strlen(buffer);
2240        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2241        *s = 0;
2242        linenumber++;
2243        if (buffer[0] == 0) continue;   /* Skip blank lines */
2244        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2245          goto EXIT2;
2246        }
2247    
2248      if (f != stdin) fclose(f);
2249      }
2250    
2251    /* Study the regular expressions, as we will be running them many times */
2252    
2253  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2254    {    {
# Line 513  for (j = 0; j < pattern_count; j++) Line 2258  for (j = 0; j < pattern_count; j++)
2258      char s[16];      char s[16];
2259      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2260      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2261      return 2;      goto EXIT2;
2262        }
2263      hint_count++;
2264      }
2265    
2266    /* If there are include or exclude patterns, compile them. */
2267    
2268    if (exclude_pattern != NULL)
2269      {
2270      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2271        pcretables);
2272      if (exclude_compiled == NULL)
2273        {
2274        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2275          errptr, error);
2276        goto EXIT2;
2277        }
2278      }
2279    
2280    if (include_pattern != NULL)
2281      {
2282      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2283        pcretables);
2284      if (include_compiled == NULL)
2285        {
2286        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2287          errptr, error);
2288        goto EXIT2;
2289      }      }
2290    }    }
2291    
2292  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2293    
2294  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2295      {
2296      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2297      goto EXIT;
2298      }
2299    
2300  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2301  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2302  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2303    otherwise forced. */
2304    
2305  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2306    
2307  for (; i < argc; i++)  for (; i < argc; i++)
2308    {    {
2309    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2310    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2311      if (frc > 1) rc = frc;
2312        else if (frc == 0 && rc == 1) rc = 0;
2313    }    }
2314    
2315    EXIT:
2316    if (pattern_list != NULL)
2317      {
2318      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2319      free(pattern_list);
2320      }
2321    if (hints_list != NULL)
2322      {
2323      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2324      free(hints_list);
2325      }
2326  return rc;  return rc;
2327    
2328    EXIT2:
2329    rc = 2;
2330    goto EXIT;
2331  }  }
2332    
2333  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.296

  ViewVC Help
Powered by ViewVC 1.1.5