/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 392 by ph10, Tue Mar 17 21:30:30 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2009 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44    #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 17  its pattern matching. */ Line 70  its pattern matching. */
70    
71  typedef int BOOL;  typedef int BOOL;
72    
73    #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76    #if BUFSIZ > 8192
77    #define MBUFTHIRD BUFSIZ
78    #else
79    #define MBUFTHIRD 8192
80    #endif
81    
82    /* Values for the "filenames" variable, which specifies options for file name
83    output. The order is important; it is assumed that a file name is wanted for
84    all values greater than FN_DEFAULT. */
85    
86    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92    /* Actions for the -d and -D options */
93    
94    enum { dee_READ, dee_SKIP, dee_RECURSE };
95    enum { DEE_READ, DEE_SKIP };
96    
97    /* Actions for special processing options (flag bits) */
98    
99    #define PO_WORD_MATCH     0x0001
100    #define PO_LINE_MATCH     0x0002
101    #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    
108    
109  /*************************************************  /*************************************************
110  *               Global variables                 *  *               Global variables                 *
111  *************************************************/  *************************************************/
112    
113  static pcre *pattern;  /* Jeffrey Friedl has some debugging requirements that are not part of the
114  static pcre_extra *hints;  regular code. */
115    
116    #ifdef JFRIEDL_DEBUG
117    static int S_arg = -1;
118    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120    static const char *jfriedl_prefix = "";
121    static const char *jfriedl_postfix = "";
122    #endif
123    
124    static int  endlinetype;
125    
126    static char *colour_string = (char *)"1;31";
127    static char *colour_option = NULL;
128    static char *dee_option = NULL;
129    static char *DEE_option = NULL;
130    static char *newline = NULL;
131    static char *pattern_filename = NULL;
132    static char *stdin_name = (char *)"(standard input)";
133    static char *locale = NULL;
134    
135    static const unsigned char *pcretables = NULL;
136    
137    static int  pattern_count = 0;
138    static pcre **pattern_list = NULL;
139    static pcre_extra **hints_list = NULL;
140    
141    static char *include_pattern = NULL;
142    static char *exclude_pattern = NULL;
143    static char *include_dir_pattern = NULL;
144    static char *exclude_dir_pattern = NULL;
145    
146    static pcre *include_compiled = NULL;
147    static pcre *exclude_compiled = NULL;
148    static pcre *include_dir_compiled = NULL;
149    static pcre *exclude_dir_compiled = NULL;
150    
151    static int after_context = 0;
152    static int before_context = 0;
153    static int both_context = 0;
154    static int dee_action = dee_READ;
155    static int DEE_action = DEE_READ;
156    static int error_count = 0;
157    static int filenames = FN_DEFAULT;
158    static int process_options = 0;
159    
160  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
161  static BOOL filenames_only = FALSE;  static BOOL do_colour = FALSE;
162    static BOOL file_offsets = FALSE;
163    static BOOL hyphenpending = FALSE;
164  static BOOL invert = FALSE;  static BOOL invert = FALSE;
165    static BOOL line_offsets = FALSE;
166    static BOOL multiline = FALSE;
167  static BOOL number = FALSE;  static BOOL number = FALSE;
168    static BOOL only_matching = FALSE;
169    static BOOL quiet = FALSE;
170  static BOOL silent = FALSE;  static BOOL silent = FALSE;
171  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
172    
173    /* Structure for options and list of them */
174    
175    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
176           OP_PATLIST };
177    
178    typedef struct option_item {
179      int type;
180      int one_char;
181      void *dataptr;
182      const char *long_name;
183      const char *help_text;
184    } option_item;
185    
186    /* Options without a single-letter equivalent get a negative value. This can be
187    used to identify them. */
188    
189    #define N_COLOUR       (-1)
190    #define N_EXCLUDE      (-2)
191    #define N_EXCLUDE_DIR  (-3)
192    #define N_HELP         (-4)
193    #define N_INCLUDE      (-5)
194    #define N_INCLUDE_DIR  (-6)
195    #define N_LABEL        (-7)
196    #define N_LOCALE       (-8)
197    #define N_NULL         (-9)
198    #define N_LOFFSETS     (-10)
199    #define N_FOFFSETS     (-11)
200    
201    static option_item optionlist[] = {
202      { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
203      { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
204      { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
205      { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
206      { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
207      { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
208      { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
209      { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
210      { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
211      { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
212      { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
213      { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
214      { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
215      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
216      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
217      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
218      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
219      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
220      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
221      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
222      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
223      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
224      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
225      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
226      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
227      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
228      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
229      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
230      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
231      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
232      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
233      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
234    #ifdef JFRIEDL_DEBUG
235      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
236    #endif
237      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
238      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
239      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
240      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
241      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
242      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
243      { OP_NODATA,    0,        NULL,               NULL,            NULL }
244    };
245    
246    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
247    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
248    that the combination of -w and -x has the same effect as -x on its own, so we
249    can treat them as the same. */
250    
251    static const char *prefix[] = {
252      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
253    
254    static const char *suffix[] = {
255      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
256    
257    /* UTF-8 tables - used only when the newline setting is "any". */
258    
259    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
260    
261    const char utf8_table4[] = {
262      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
265      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
266    
267    
268    
269    /*************************************************
270    *            OS-specific functions               *
271    *************************************************/
272    
273    /* These functions are defined so that they can be made system specific,
274    although at present the only ones are for Unix, Win32, and for "no support". */
275    
276    
277    /************* Directory scanning in Unix ***********/
278    
279    #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
280    #include <sys/types.h>
281    #include <sys/stat.h>
282    #include <dirent.h>
283    
284    typedef DIR directory_type;
285    
286    static int
287    isdirectory(char *filename)
288    {
289    struct stat statbuf;
290    if (stat(filename, &statbuf) < 0)
291      return 0;        /* In the expectation that opening as a file will fail */
292    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
293    }
294    
295    static directory_type *
296    opendirectory(char *filename)
297    {
298    return opendir(filename);
299    }
300    
301    static char *
302    readdirectory(directory_type *dir)
303    {
304    for (;;)
305      {
306      struct dirent *dent = readdir(dir);
307      if (dent == NULL) return NULL;
308      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
309        return dent->d_name;
310      }
311    /* Control never reaches here */
312    }
313    
314    static void
315    closedirectory(directory_type *dir)
316    {
317    closedir(dir);
318    }
319    
320    
321    /************* Test for regular file in Unix **********/
322    
323    static int
324    isregfile(char *filename)
325    {
326    struct stat statbuf;
327    if (stat(filename, &statbuf) < 0)
328      return 1;        /* In the expectation that opening as a file will fail */
329    return (statbuf.st_mode & S_IFMT) == S_IFREG;
330    }
331    
332    
333    /************* Test stdout for being a terminal in Unix **********/
334    
335    static BOOL
336    is_stdout_tty(void)
337    {
338    return isatty(fileno(stdout));
339    }
340    
341    
342    /************* Directory scanning in Win32 ***********/
343    
344    /* I (Philip Hazel) have no means of testing this code. It was contributed by
345    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
346    when it did not exist. David Byron added a patch that moved the #include of
347    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
348    */
349    
350    #elif HAVE_WINDOWS_H
351    
352    #ifndef STRICT
353    # define STRICT
354    #endif
355    #ifndef WIN32_LEAN_AND_MEAN
356    # define WIN32_LEAN_AND_MEAN
357    #endif
358    
359    #include <windows.h>
360    
361    #ifndef INVALID_FILE_ATTRIBUTES
362    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
363    #endif
364    
365    typedef struct directory_type
366    {
367    HANDLE handle;
368    BOOL first;
369    WIN32_FIND_DATA data;
370    } directory_type;
371    
372    int
373    isdirectory(char *filename)
374    {
375    DWORD attr = GetFileAttributes(filename);
376    if (attr == INVALID_FILE_ATTRIBUTES)
377      return 0;
378    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
379    }
380    
381    directory_type *
382    opendirectory(char *filename)
383    {
384    size_t len;
385    char *pattern;
386    directory_type *dir;
387    DWORD err;
388    len = strlen(filename);
389    pattern = (char *) malloc(len + 3);
390    dir = (directory_type *) malloc(sizeof(*dir));
391    if ((pattern == NULL) || (dir == NULL))
392      {
393      fprintf(stderr, "pcregrep: malloc failed\n");
394      exit(2);
395      }
396    memcpy(pattern, filename, len);
397    memcpy(&(pattern[len]), "\\*", 3);
398    dir->handle = FindFirstFile(pattern, &(dir->data));
399    if (dir->handle != INVALID_HANDLE_VALUE)
400      {
401      free(pattern);
402      dir->first = TRUE;
403      return dir;
404      }
405    err = GetLastError();
406    free(pattern);
407    free(dir);
408    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
409    return NULL;
410    }
411    
412    char *
413    readdirectory(directory_type *dir)
414    {
415    for (;;)
416      {
417      if (!dir->first)
418        {
419        if (!FindNextFile(dir->handle, &(dir->data)))
420          return NULL;
421        }
422      else
423        {
424        dir->first = FALSE;
425        }
426      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
427        return dir->data.cFileName;
428      }
429    #ifndef _MSC_VER
430    return NULL;   /* Keep compiler happy; never executed */
431    #endif
432    }
433    
434    void
435    closedirectory(directory_type *dir)
436    {
437    FindClose(dir->handle);
438    free(dir);
439    }
440    
441    
442    /************* Test for regular file in Win32 **********/
443    
444    /* I don't know how to do this, or if it can be done; assume all paths are
445    regular if they are not directories. */
446    
447    int isregfile(char *filename)
448    {
449    return !isdirectory(filename);
450    }
451    
452    
453    /************* Test stdout for being a terminal in Win32 **********/
454    
455  #if ! HAVE_STRERROR  /* I don't know how to do this; assume never */
456    
457    static BOOL
458    is_stdout_tty(void)
459    {
460    return FALSE;
461    }
462    
463    
464    /************* Directory scanning when we can't do it ***********/
465    
466    /* The type is void, and apart from isdirectory(), the functions do nothing. */
467    
468    #else
469    
470    typedef void directory_type;
471    
472    int isdirectory(char *filename) { return 0; }
473    directory_type * opendirectory(char *filename) { return (directory_type*)0;}
474    char *readdirectory(directory_type *dir) { return (char*)0;}
475    void closedirectory(directory_type *dir) {}
476    
477    
478    /************* Test for regular when we can't do it **********/
479    
480    /* Assume all files are regular. */
481    
482    int isregfile(char *filename) { return 1; }
483    
484    
485    /************* Test stdout for being a terminal when we can't do it **********/
486    
487    static BOOL
488    is_stdout_tty(void)
489    {
490    return FALSE;
491    }
492    
493    
494    #endif
495    
496    
497    
498    #ifndef HAVE_STRERROR
499  /*************************************************  /*************************************************
500  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
501  *************************************************/  *************************************************/
# Line 58  return sys_errlist[n]; Line 518  return sys_errlist[n];
518    
519    
520  /*************************************************  /*************************************************
521  *              Grep an individual file           *  *             Find end of line                   *
522  *************************************************/  *************************************************/
523    
524  static int  /* The length of the endline sequence that is found is set via lenptr. This may
525  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
 {  
 int rc = 1;  
 int linenumber = 0;  
 int count = 0;  
 int offsets[99];  
 char buffer[BUFSIZ];  
526    
527  while (fgets(buffer, sizeof(buffer), in) != NULL)  Arguments:
528    {    p         current position in line
529    BOOL match;    endptr    end of available data
530    int length = (int)strlen(buffer);    lenptr    where to put the length of the eol sequence
   if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;  
   linenumber++;  
531    
532    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;  Returns:    pointer to the last byte of the line
533    if (match && whole_lines && offsets[1] != length) match = FALSE;  */
534    
535    if (match != invert)  static char *
536    end_of_line(char *p, char *endptr, int *lenptr)
537    {
538    switch(endlinetype)
539      {
540      default:      /* Just in case */
541      case EL_LF:
542      while (p < endptr && *p != '\n') p++;
543      if (p < endptr)
544      {      {
545      if (count_only) count++;      *lenptr = 1;
546        return p + 1;
547        }
548      *lenptr = 0;
549      return endptr;
550    
551      case EL_CR:
552      while (p < endptr && *p != '\r') p++;
553      if (p < endptr)
554        {
555        *lenptr = 1;
556        return p + 1;
557        }
558      *lenptr = 0;
559      return endptr;
560    
561      else if (filenames_only)    case EL_CRLF:
562      for (;;)
563        {
564        while (p < endptr && *p != '\r') p++;
565        if (++p >= endptr)
566        {        {
567        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        *lenptr = 0;
568        return 0;        return endptr;
569        }        }
570        if (*p == '\n')
571          {
572          *lenptr = 2;
573          return p + 1;
574          }
575        }
576      break;
577    
578      else if (silent) return 0;    case EL_ANYCRLF:
579      while (p < endptr)
580        {
581        int extra = 0;
582        register int c = *((unsigned char *)p);
583    
584      else      if (utf8 && c >= 0xc0)
585        {        {
586        if (name != NULL) fprintf(stdout, "%s:", name);        int gcii, gcss;
587        if (number) fprintf(stdout, "%d:", linenumber);        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
588        fprintf(stdout, "%s\n", buffer);        gcss = 6*extra;
589          c = (c & utf8_table3[extra]) << gcss;
590          for (gcii = 1; gcii <= extra; gcii++)
591            {
592            gcss -= 6;
593            c |= (p[gcii] & 0x3f) << gcss;
594            }
595        }        }
596    
597      rc = 0;      p += 1 + extra;
     }  
   }  
598    
599  if (count_only)      switch (c)
600    {        {
601    if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
602    fprintf(stdout, "%d\n", count);        *lenptr = 1;
603    }        return p;
604    
605          case 0x0d:    /* CR */
606          if (p < endptr && *p == 0x0a)
607            {
608            *lenptr = 2;
609            p++;
610            }
611          else *lenptr = 1;
612          return p;
613    
614  return rc;        default:
615  }        break;
616          }
617        }   /* End of loop for ANYCRLF case */
618    
619      *lenptr = 0;  /* Must have hit the end */
620      return endptr;
621    
622      case EL_ANY:
623      while (p < endptr)
624        {
625        int extra = 0;
626        register int c = *((unsigned char *)p);
627    
628        if (utf8 && c >= 0xc0)
629          {
630          int gcii, gcss;
631          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
632          gcss = 6*extra;
633          c = (c & utf8_table3[extra]) << gcss;
634          for (gcii = 1; gcii <= extra; gcii++)
635            {
636            gcss -= 6;
637            c |= (p[gcii] & 0x3f) << gcss;
638            }
639          }
640    
641  /*************************************************      p += 1 + extra;
 *                Usage function                  *  
 *************************************************/  
642    
643  static int      switch (c)
644  usage(int rc)        {
645  {        case 0x0a:    /* LF */
646  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");        case 0x0b:    /* VT */
647  return rc;        case 0x0c:    /* FF */
648  }        *lenptr = 1;
649          return p;
650    
651          case 0x0d:    /* CR */
652          if (p < endptr && *p == 0x0a)
653            {
654            *lenptr = 2;
655            p++;
656            }
657          else *lenptr = 1;
658          return p;
659    
660          case 0x85:    /* NEL */
661          *lenptr = utf8? 2 : 1;
662          return p;
663    
664          case 0x2028:  /* LS */
665          case 0x2029:  /* PS */
666          *lenptr = 3;
667          return p;
668    
669          default:
670          break;
671          }
672        }   /* End of loop for ANY case */
673    
674      *lenptr = 0;  /* Must have hit the end */
675      return endptr;
676      }     /* End of overall switch */
677    }
678    
679    
680    
681  /*************************************************  /*************************************************
682  *                Main program                    *  *         Find start of previous line            *
683  *************************************************/  *************************************************/
684    
685  int  /* This is called when looking back for before lines to print.
 main(int argc, char **argv)  
 {  
 int i;  
 int rc = 1;  
 int options = 0;  
 int errptr;  
 const char *error;  
 BOOL filenames = TRUE;  
686    
687  /* Process the options */  Arguments:
688      p         start of the subsequent line
689      startptr  start of available data
690    
691  for (i = 1; i < argc; i++)  Returns:    pointer to the start of the previous line
692    */
693    
694    static char *
695    previous_line(char *p, char *startptr)
696    {
697    switch(endlinetype)
698    {    {
699    char *s;    default:      /* Just in case */
700    if (argv[i][0] != '-') break;    case EL_LF:
701    s = argv[i] + 1;    p--;
702    while (*s != 0)    while (p > startptr && p[-1] != '\n') p--;
703      return p;
704    
705      case EL_CR:
706      p--;
707      while (p > startptr && p[-1] != '\n') p--;
708      return p;
709    
710      case EL_CRLF:
711      for (;;)
712        {
713        p -= 2;
714        while (p > startptr && p[-1] != '\n') p--;
715        if (p <= startptr + 1 || p[-2] == '\r') return p;
716        }
717      return p;   /* But control should never get here */
718    
719      case EL_ANY:
720      case EL_ANYCRLF:
721      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
722      if (utf8) while ((*p & 0xc0) == 0x80) p--;
723    
724      while (p > startptr)
725      {      {
726      switch (*s++)      register int c;
727        char *pp = p - 1;
728    
729        if (utf8)
730          {
731          int extra = 0;
732          while ((*pp & 0xc0) == 0x80) pp--;
733          c = *((unsigned char *)pp);
734          if (c >= 0xc0)
735            {
736            int gcii, gcss;
737            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
738            gcss = 6*extra;
739            c = (c & utf8_table3[extra]) << gcss;
740            for (gcii = 1; gcii <= extra; gcii++)
741              {
742              gcss -= 6;
743              c |= (pp[gcii] & 0x3f) << gcss;
744              }
745            }
746          }
747        else c = *((unsigned char *)pp);
748    
749        if (endlinetype == EL_ANYCRLF) switch (c)
750        {        {
751        case 'c': count_only = TRUE; break;        case 0x0a:    /* LF */
752        case 'h': filenames = FALSE; break;        case 0x0d:    /* CR */
753        case 'i': options |= PCRE_CASELESS; break;        return p;
       case 'l': filenames_only = TRUE;  
       case 'n': number = TRUE; break;  
       case 's': silent = TRUE; break;  
       case 'v': invert = TRUE; break;  
       case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  
754    
755        case 'V':        default:
       fprintf(stderr, "PCRE version %s\n", pcre_version());  
756        break;        break;
757          }
758    
759        else switch (c)
760          {
761          case 0x0a:    /* LF */
762          case 0x0b:    /* VT */
763          case 0x0c:    /* FF */
764          case 0x0d:    /* CR */
765          case 0x85:    /* NEL */
766          case 0x2028:  /* LS */
767          case 0x2029:  /* PS */
768          return p;
769    
770        default:        default:
771        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);        break;
       return usage(2);  
772        }        }
     }  
   }  
773    
774  /* There must be at least a regexp argument */      p = pp;  /* Back one character */
775        }        /* End of loop for ANY case */
776    
777  if (i >= argc) return usage(0);    return startptr;  /* Hit start of data */
778      }     /* End of overall switch */
779    }
780    
 /* Compile the regular expression. */  
781    
 pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  
 if (pattern == NULL)  
   {  
   fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);  
   return 2;  
   }  
782    
 /* Study the regular expression, as we will be running it may times */  
783    
784  hints = pcre_study(pattern, 0, &error);  
785  if (error != NULL)  /*************************************************
786    *       Print the previous "after" lines         *
787    *************************************************/
788    
789    /* This is called if we are about to lose said lines because of buffer filling,
790    and at the end of the file. The data in the line is written using fwrite() so
791    that a binary zero does not terminate it.
792    
793    Arguments:
794      lastmatchnumber   the number of the last matching line, plus one
795      lastmatchrestart  where we restarted after the last match
796      endptr            end of available data
797      printname         filename for printing
798    
799    Returns:            nothing
800    */
801    
802    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
803      char *endptr, char *printname)
804    {
805    if (after_context > 0 && lastmatchnumber > 0)
806    {    {
807    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);    int count = 0;
808    return 2;    while (lastmatchrestart < endptr && count++ < after_context)
809        {
810        int ellength;
811        char *pp = lastmatchrestart;
812        if (printname != NULL) fprintf(stdout, "%s-", printname);
813        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
814        pp = end_of_line(pp, endptr, &ellength);
815        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
816        lastmatchrestart = pp;
817        }
818      hyphenpending = TRUE;
819    }    }
820    }
821    
 /* If there are no further arguments, do the business on stdin and exit */  
822    
 if (i >= argc) return pcregrep(stdin, NULL);  
823    
824  /* Otherwise, work through the remaining arguments as files. If there is only  /*************************************************
825  one, don't give its name on the output. */  *   Apply patterns to subject till one matches   *
826    *************************************************/
827    
828  if (i == argc - 1) filenames = FALSE;  /* This function is called to run through all patterns, looking for a match. It
829  if (filenames_only) filenames = TRUE;  is used multiple times for the same subject when colouring is enabled, in order
830    to find all possible matches.
831    
832    Arguments:
833      matchptr    the start of the subject
834      length      the length of the subject to match
835      offsets     the offets vector to fill in
836      mrc         address of where to put the result of pcre_exec()
837    
838    Returns:      TRUE if there was a match
839                  FALSE if there was no match
840                  invert if there was a non-fatal error
841    */
842    
843  for (; i < argc; i++)  static BOOL
844    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
845    {
846    int i;
847    for (i = 0; i < pattern_count; i++)
848    {    {
849    FILE *in = fopen(argv[i], "r");    *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
850    if (in == NULL)      PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
851      if (*mrc >= 0) return TRUE;
852      if (*mrc == PCRE_ERROR_NOMATCH) continue;
853      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
854      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
855      fprintf(stderr, "this text:\n");
856      fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */
857      fprintf(stderr, "\n");
858      if (error_count == 0 &&
859          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
860      {      {
861      fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));      fprintf(stderr, "pcregrep: error %d means that a resource limit "
862      rc = 2;        "was exceeded\n", *mrc);
863        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
864      }      }
865    else    if (error_count++ > 20)
866      {      {
867      int frc = pcregrep(in, filenames? argv[i] : NULL);      fprintf(stderr, "pcregrep: too many errors - abandoned\n");
868      if (frc == 0 && rc == 1) rc = 0;      exit(2);
     fclose(in);  
869      }      }
870      return invert;    /* No more matching; don't show the line again */
871    }    }
872    
873  return rc;  return FALSE;  /* No match, no errors */
874    }
875    
876    
877    
878    /*************************************************
879    *            Grep an individual file             *
880    *************************************************/
881    
882    /* This is called from grep_or_recurse() below. It uses a buffer that is three
883    times the value of MBUFTHIRD. The matching point is never allowed to stray into
884    the top third of the buffer, thus keeping more of the file available for
885    context printing or for multiline scanning. For large files, the pointer will
886    be in the middle third most of the time, so the bottom third is available for
887    "before" context printing.
888    
889    Arguments:
890      handle       the fopened FILE stream for a normal file
891                   the gzFile pointer when reading is via libz
892                   the BZFILE pointer when reading is via libbz2
893      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
894      printname    the file name if it is to be printed for each match
895                   or NULL if the file name is not to be printed
896                   it cannot be NULL if filenames[_nomatch]_only is set
897    
898    Returns:       0 if there was at least one match
899                   1 otherwise (no matches)
900                   2 if there is a read error on a .bz2 file
901    */
902    
903    static int
904    pcregrep(void *handle, int frtype, char *printname)
905    {
906    int rc = 1;
907    int linenumber = 1;
908    int lastmatchnumber = 0;
909    int count = 0;
910    int filepos = 0;
911    int offsets[OFFSET_SIZE];
912    char *lastmatchrestart = NULL;
913    char buffer[3*MBUFTHIRD];
914    char *ptr = buffer;
915    char *endptr;
916    size_t bufflength;
917    BOOL endhyphenpending = FALSE;
918    FILE *in = NULL;                    /* Ensure initialized */
919    
920    #ifdef SUPPORT_LIBZ
921    gzFile ingz = NULL;
922    #endif
923    
924    #ifdef SUPPORT_LIBBZ2
925    BZFILE *inbz2 = NULL;
926    #endif
927    
928    
929    /* Do the first read into the start of the buffer and set up the pointer to end
930    of what we have. In the case of libz, a non-zipped .gz file will be read as a
931    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
932    fail. */
933    
934    #ifdef SUPPORT_LIBZ
935    if (frtype == FR_LIBZ)
936      {
937      ingz = (gzFile)handle;
938      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
939      }
940    else
941    #endif
942    
943    #ifdef SUPPORT_LIBBZ2
944    if (frtype == FR_LIBBZ2)
945      {
946      inbz2 = (BZFILE *)handle;
947      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
948      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
949      }                                    /* without the cast it is unsigned. */
950    else
951    #endif
952    
953      {
954      in = (FILE *)handle;
955      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
956      }
957    
958    endptr = buffer + bufflength;
959    
960    /* Loop while the current pointer is not at the end of the file. For large
961    files, endptr will be at the end of the buffer when we are in the middle of the
962    file, but ptr will never get there, because as soon as it gets over 2/3 of the
963    way, the buffer is shifted left and re-filled. */
964    
965    while (ptr < endptr)
966      {
967      int endlinelength;
968      int mrc = 0;
969      BOOL match;
970      char *matchptr = ptr;
971      char *t = ptr;
972      size_t length, linelength;
973    
974      /* At this point, ptr is at the start of a line. We need to find the length
975      of the subject string to pass to pcre_exec(). In multiline mode, it is the
976      length remainder of the data in the buffer. Otherwise, it is the length of
977      the next line, excluding the terminating newline. After matching, we always
978      advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
979      option is used for compiling, so that any match is constrained to be in the
980      first line. */
981    
982      t = end_of_line(t, endptr, &endlinelength);
983      linelength = t - ptr - endlinelength;
984      length = multiline? (size_t)(endptr - ptr) : linelength;
985    
986      /* Extra processing for Jeffrey Friedl's debugging. */
987    
988    #ifdef JFRIEDL_DEBUG
989      if (jfriedl_XT || jfriedl_XR)
990      {
991          #include <sys/time.h>
992          #include <time.h>
993          struct timeval start_time, end_time;
994          struct timezone dummy;
995          int i;
996    
997          if (jfriedl_XT)
998          {
999              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1000              const char *orig = ptr;
1001              ptr = malloc(newlen + 1);
1002              if (!ptr) {
1003                      printf("out of memory");
1004                      exit(2);
1005              }
1006              endptr = ptr;
1007              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1008              for (i = 0; i < jfriedl_XT; i++) {
1009                      strncpy(endptr, orig,  length);
1010                      endptr += length;
1011              }
1012              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1013              length = newlen;
1014          }
1015    
1016          if (gettimeofday(&start_time, &dummy) != 0)
1017                  perror("bad gettimeofday");
1018    
1019    
1020          for (i = 0; i < jfriedl_XR; i++)
1021              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1022                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1023    
1024          if (gettimeofday(&end_time, &dummy) != 0)
1025                  perror("bad gettimeofday");
1026    
1027          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1028                          -
1029                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1030    
1031          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1032          return 0;
1033      }
1034    #endif
1035    
1036      /* We come back here after a match when the -o option (only_matching) is set,
1037      in order to find any further matches in the same line. */
1038    
1039      ONLY_MATCHING_RESTART:
1040    
1041      /* Run through all the patterns until one matches or there is an error other
1042      than NOMATCH. This code is in a subroutine so that it can be re-used for
1043      finding subsequent matches when colouring matched lines. */
1044    
1045      match = match_patterns(matchptr, length, offsets, &mrc);
1046    
1047      /* If it's a match or a not-match (as required), do what's wanted. */
1048    
1049      if (match != invert)
1050        {
1051        BOOL hyphenprinted = FALSE;
1052    
1053        /* We've failed if we want a file that doesn't have any matches. */
1054    
1055        if (filenames == FN_NOMATCH_ONLY) return 1;
1056    
1057        /* Just count if just counting is wanted. */
1058    
1059        if (count_only) count++;
1060    
1061        /* If all we want is a file name, there is no need to scan any more lines
1062        in the file. */
1063    
1064        else if (filenames == FN_ONLY)
1065          {
1066          fprintf(stdout, "%s\n", printname);
1067          return 0;
1068          }
1069    
1070        /* Likewise, if all we want is a yes/no answer. */
1071    
1072        else if (quiet) return 0;
1073    
1074        /* The --only-matching option prints just the substring that matched, and
1075        the --file-offsets and --line-offsets options output offsets for the
1076        matching substring (they both force --only-matching). None of these options
1077        prints any context. Afterwards, adjust the start and length, and then jump
1078        back to look for further matches in the same line. If we are in invert
1079        mode, however, nothing is printed - this could be still useful because the
1080        return code is set. */
1081    
1082        else if (only_matching)
1083          {
1084          if (!invert)
1085            {
1086            if (printname != NULL) fprintf(stdout, "%s:", printname);
1087            if (number) fprintf(stdout, "%d:", linenumber);
1088            if (line_offsets)
1089              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1090                offsets[1] - offsets[0]);
1091            else if (file_offsets)
1092              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1093                offsets[1] - offsets[0]);
1094            else
1095              {
1096              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1097              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1098              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1099              }
1100            fprintf(stdout, "\n");
1101            matchptr += offsets[1];
1102            length -= offsets[1];
1103            match = FALSE;
1104            goto ONLY_MATCHING_RESTART;
1105            }
1106          }
1107    
1108        /* This is the default case when none of the above options is set. We print
1109        the matching lines(s), possibly preceded and/or followed by other lines of
1110        context. */
1111    
1112        else
1113          {
1114          /* See if there is a requirement to print some "after" lines from a
1115          previous match. We never print any overlaps. */
1116    
1117          if (after_context > 0 && lastmatchnumber > 0)
1118            {
1119            int ellength;
1120            int linecount = 0;
1121            char *p = lastmatchrestart;
1122    
1123            while (p < ptr && linecount < after_context)
1124              {
1125              p = end_of_line(p, ptr, &ellength);
1126              linecount++;
1127              }
1128    
1129            /* It is important to advance lastmatchrestart during this printing so
1130            that it interacts correctly with any "before" printing below. Print
1131            each line's data using fwrite() in case there are binary zeroes. */
1132    
1133            while (lastmatchrestart < p)
1134              {
1135              char *pp = lastmatchrestart;
1136              if (printname != NULL) fprintf(stdout, "%s-", printname);
1137              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1138              pp = end_of_line(pp, endptr, &ellength);
1139              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1140              lastmatchrestart = pp;
1141              }
1142            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1143            }
1144    
1145          /* If there were non-contiguous lines printed above, insert hyphens. */
1146    
1147          if (hyphenpending)
1148            {
1149            fprintf(stdout, "--\n");
1150            hyphenpending = FALSE;
1151            hyphenprinted = TRUE;
1152            }
1153    
1154          /* See if there is a requirement to print some "before" lines for this
1155          match. Again, don't print overlaps. */
1156    
1157          if (before_context > 0)
1158            {
1159            int linecount = 0;
1160            char *p = ptr;
1161    
1162            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1163                   linecount < before_context)
1164              {
1165              linecount++;
1166              p = previous_line(p, buffer);
1167              }
1168    
1169            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1170              fprintf(stdout, "--\n");
1171    
1172            while (p < ptr)
1173              {
1174              int ellength;
1175              char *pp = p;
1176              if (printname != NULL) fprintf(stdout, "%s-", printname);
1177              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1178              pp = end_of_line(pp, endptr, &ellength);
1179              fwrite(p, 1, pp - p, stdout);
1180              p = pp;
1181              }
1182            }
1183    
1184          /* Now print the matching line(s); ensure we set hyphenpending at the end
1185          of the file if any context lines are being output. */
1186    
1187          if (after_context > 0 || before_context > 0)
1188            endhyphenpending = TRUE;
1189    
1190          if (printname != NULL) fprintf(stdout, "%s:", printname);
1191          if (number) fprintf(stdout, "%d:", linenumber);
1192    
1193          /* In multiline mode, we want to print to the end of the line in which
1194          the end of the matched string is found, so we adjust linelength and the
1195          line number appropriately, but only when there actually was a match
1196          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1197          the match will always be before the first newline sequence. */
1198    
1199          if (multiline)
1200            {
1201            int ellength;
1202            char *endmatch = ptr;
1203            if (!invert)
1204              {
1205              endmatch += offsets[1];
1206              t = ptr;
1207              while (t < endmatch)
1208                {
1209                t = end_of_line(t, endptr, &ellength);
1210                if (t <= endmatch) linenumber++; else break;
1211                }
1212              }
1213            endmatch = end_of_line(endmatch, endptr, &ellength);
1214            linelength = endmatch - ptr - ellength;
1215            }
1216    
1217          /*** NOTE: Use only fwrite() to output the data line, so that binary
1218          zeroes are treated as just another data character. */
1219    
1220          /* This extra option, for Jeffrey Friedl's debugging requirements,
1221          replaces the matched string, or a specific captured string if it exists,
1222          with X. When this happens, colouring is ignored. */
1223    
1224    #ifdef JFRIEDL_DEBUG
1225          if (S_arg >= 0 && S_arg < mrc)
1226            {
1227            int first = S_arg * 2;
1228            int last  = first + 1;
1229            fwrite(ptr, 1, offsets[first], stdout);
1230            fprintf(stdout, "X");
1231            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1232            }
1233          else
1234    #endif
1235    
1236          /* We have to split the line(s) up if colouring, and search for further
1237          matches. */
1238    
1239          if (do_colour)
1240            {
1241            int last_offset = 0;
1242            fwrite(ptr, 1, offsets[0], stdout);
1243            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1244            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1245            fprintf(stdout, "%c[00m", 0x1b);
1246            for (;;)
1247              {
1248              last_offset += offsets[1];
1249              matchptr += offsets[1];
1250              length -= offsets[1];
1251              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1252              fwrite(matchptr, 1, offsets[0], stdout);
1253              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1254              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1255              fprintf(stdout, "%c[00m", 0x1b);
1256              }
1257            fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1258              stdout);
1259            }
1260    
1261          /* Not colouring; no need to search for further matches */
1262    
1263          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1264          }
1265    
1266        /* End of doing what has to be done for a match */
1267    
1268        rc = 0;    /* Had some success */
1269    
1270        /* Remember where the last match happened for after_context. We remember
1271        where we are about to restart, and that line's number. */
1272    
1273        lastmatchrestart = ptr + linelength + endlinelength;
1274        lastmatchnumber = linenumber + 1;
1275        }
1276    
1277      /* For a match in multiline inverted mode (which of course did not cause
1278      anything to be printed), we have to move on to the end of the match before
1279      proceeding. */
1280    
1281      if (multiline && invert && match)
1282        {
1283        int ellength;
1284        char *endmatch = ptr + offsets[1];
1285        t = ptr;
1286        while (t < endmatch)
1287          {
1288          t = end_of_line(t, endptr, &ellength);
1289          if (t <= endmatch) linenumber++; else break;
1290          }
1291        endmatch = end_of_line(endmatch, endptr, &ellength);
1292        linelength = endmatch - ptr - ellength;
1293        }
1294    
1295      /* Advance to after the newline and increment the line number. The file
1296      offset to the current line is maintained in filepos. */
1297    
1298      ptr += linelength + endlinelength;
1299      filepos += linelength + endlinelength;
1300      linenumber++;
1301    
1302      /* If we haven't yet reached the end of the file (the buffer is full), and
1303      the current point is in the top 1/3 of the buffer, slide the buffer down by
1304      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1305      about to be lost, print them. */
1306    
1307      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1308        {
1309        if (after_context > 0 &&
1310            lastmatchnumber > 0 &&
1311            lastmatchrestart < buffer + MBUFTHIRD)
1312          {
1313          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1314          lastmatchnumber = 0;
1315          }
1316    
1317        /* Now do the shuffle */
1318    
1319        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1320        ptr -= MBUFTHIRD;
1321    
1322    #ifdef SUPPORT_LIBZ
1323        if (frtype == FR_LIBZ)
1324          bufflength = 2*MBUFTHIRD +
1325            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1326        else
1327    #endif
1328    
1329    #ifdef SUPPORT_LIBBZ2
1330        if (frtype == FR_LIBBZ2)
1331          bufflength = 2*MBUFTHIRD +
1332            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1333        else
1334    #endif
1335    
1336        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1337    
1338        endptr = buffer + bufflength;
1339    
1340        /* Adjust any last match point */
1341    
1342        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1343        }
1344      }     /* Loop through the whole file */
1345    
1346    /* End of file; print final "after" lines if wanted; do_after_lines sets
1347    hyphenpending if it prints something. */
1348    
1349    if (!only_matching && !count_only)
1350      {
1351      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1352      hyphenpending |= endhyphenpending;
1353      }
1354    
1355    /* Print the file name if we are looking for those without matches and there
1356    were none. If we found a match, we won't have got this far. */
1357    
1358    if (filenames == FN_NOMATCH_ONLY)
1359      {
1360      fprintf(stdout, "%s\n", printname);
1361      return 0;
1362      }
1363    
1364    /* Print the match count if wanted */
1365    
1366    if (count_only)
1367      {
1368      if (printname != NULL) fprintf(stdout, "%s:", printname);
1369      fprintf(stdout, "%d\n", count);
1370      }
1371    
1372    return rc;
1373    }
1374    
1375    
1376    
1377    /*************************************************
1378    *     Grep a file or recurse into a directory    *
1379    *************************************************/
1380    
1381    /* Given a path name, if it's a directory, scan all the files if we are
1382    recursing; if it's a file, grep it.
1383    
1384    Arguments:
1385      pathname          the path to investigate
1386      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1387      only_one_at_top   TRUE if the path is the only one at toplevel
1388    
1389    Returns:   0 if there was at least one match
1390               1 if there were no matches
1391               2 there was some kind of error
1392    
1393    However, file opening failures are suppressed if "silent" is set.
1394    */
1395    
1396    static int
1397    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1398    {
1399    int rc = 1;
1400    int sep;
1401    int frtype;
1402    int pathlen;
1403    void *handle;
1404    FILE *in = NULL;           /* Ensure initialized */
1405    
1406    #ifdef SUPPORT_LIBZ
1407    gzFile ingz = NULL;
1408    #endif
1409    
1410    #ifdef SUPPORT_LIBBZ2
1411    BZFILE *inbz2 = NULL;
1412    #endif
1413    
1414    /* If the file name is "-" we scan stdin */
1415    
1416    if (strcmp(pathname, "-") == 0)
1417      {
1418      return pcregrep(stdin, FR_PLAIN,
1419        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1420          stdin_name : NULL);
1421      }
1422    
1423    /* If the file is a directory, skip if skipping or if we are recursing, scan
1424    each file and directory within it, subject to any include or exclude patterns
1425    that were set. The scanning code is localized so it can be made
1426    system-specific. */
1427    
1428    if ((sep = isdirectory(pathname)) != 0)
1429      {
1430      if (dee_action == dee_SKIP) return 1;
1431      if (dee_action == dee_RECURSE)
1432        {
1433        char buffer[1024];
1434        char *nextfile;
1435        directory_type *dir = opendirectory(pathname);
1436    
1437        if (dir == NULL)
1438          {
1439          if (!silent)
1440            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1441              strerror(errno));
1442          return 2;
1443          }
1444    
1445        while ((nextfile = readdirectory(dir)) != NULL)
1446          {
1447          int frc, nflen;
1448          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1449          nflen = strlen(nextfile);
1450    
1451          if (isdirectory(buffer))
1452            {
1453            if (exclude_dir_compiled != NULL &&
1454                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1455              continue;
1456    
1457            if (include_dir_compiled != NULL &&
1458                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1459              continue;
1460            }
1461          else
1462            {
1463            if (exclude_compiled != NULL &&
1464                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1465              continue;
1466    
1467            if (include_compiled != NULL &&
1468                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1469              continue;
1470            }
1471    
1472          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1473          if (frc > 1) rc = frc;
1474           else if (frc == 0 && rc == 1) rc = 0;
1475          }
1476    
1477        closedirectory(dir);
1478        return rc;
1479        }
1480      }
1481    
1482    /* If the file is not a directory and not a regular file, skip it if that's
1483    been requested. */
1484    
1485    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1486    
1487    /* Control reaches here if we have a regular file, or if we have a directory
1488    and recursion or skipping was not requested, or if we have anything else and
1489    skipping was not requested. The scan proceeds. If this is the first and only
1490    argument at top level, we don't show the file name, unless we are only showing
1491    the file name, or the filename was forced (-H). */
1492    
1493    pathlen = strlen(pathname);
1494    
1495    /* Open using zlib if it is supported and the file name ends with .gz. */
1496    
1497    #ifdef SUPPORT_LIBZ
1498    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1499      {
1500      ingz = gzopen(pathname, "rb");
1501      if (ingz == NULL)
1502        {
1503        if (!silent)
1504          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1505            strerror(errno));
1506        return 2;
1507        }
1508      handle = (void *)ingz;
1509      frtype = FR_LIBZ;
1510      }
1511    else
1512    #endif
1513    
1514    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1515    
1516    #ifdef SUPPORT_LIBBZ2
1517    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1518      {
1519      inbz2 = BZ2_bzopen(pathname, "rb");
1520      handle = (void *)inbz2;
1521      frtype = FR_LIBBZ2;
1522      }
1523    else
1524    #endif
1525    
1526    /* Otherwise use plain fopen(). The label is so that we can come back here if
1527    an attempt to read a .bz2 file indicates that it really is a plain file. */
1528    
1529    #ifdef SUPPORT_LIBBZ2
1530    PLAIN_FILE:
1531    #endif
1532      {
1533      in = fopen(pathname, "r");
1534      handle = (void *)in;
1535      frtype = FR_PLAIN;
1536      }
1537    
1538    /* All the opening methods return errno when they fail. */
1539    
1540    if (handle == NULL)
1541      {
1542      if (!silent)
1543        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1544          strerror(errno));
1545      return 2;
1546      }
1547    
1548    /* Now grep the file */
1549    
1550    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1551      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1552    
1553    /* Close in an appropriate manner. */
1554    
1555    #ifdef SUPPORT_LIBZ
1556    if (frtype == FR_LIBZ)
1557      gzclose(ingz);
1558    else
1559    #endif
1560    
1561    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1562    read failed. If the error indicates that the file isn't in fact bzipped, try
1563    again as a normal file. */
1564    
1565    #ifdef SUPPORT_LIBBZ2
1566    if (frtype == FR_LIBBZ2)
1567      {
1568      if (rc == 2)
1569        {
1570        int errnum;
1571        const char *err = BZ2_bzerror(inbz2, &errnum);
1572        if (errnum == BZ_DATA_ERROR_MAGIC)
1573          {
1574          BZ2_bzclose(inbz2);
1575          goto PLAIN_FILE;
1576          }
1577        else if (!silent)
1578          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1579            pathname, err);
1580        }
1581      BZ2_bzclose(inbz2);
1582      }
1583    else
1584    #endif
1585    
1586    /* Normal file close */
1587    
1588    fclose(in);
1589    
1590    /* Pass back the yield from pcregrep(). */
1591    
1592    return rc;
1593    }
1594    
1595    
1596    
1597    
1598    /*************************************************
1599    *                Usage function                  *
1600    *************************************************/
1601    
1602    static int
1603    usage(int rc)
1604    {
1605    option_item *op;
1606    fprintf(stderr, "Usage: pcregrep [-");
1607    for (op = optionlist; op->one_char != 0; op++)
1608      {
1609      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1610      }
1611    fprintf(stderr, "] [long options] [pattern] [files]\n");
1612    fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1613      "options.\n");
1614    return rc;
1615    }
1616    
1617    
1618    
1619    
1620    /*************************************************
1621    *                Help function                   *
1622    *************************************************/
1623    
1624    static void
1625    help(void)
1626    {
1627    option_item *op;
1628    
1629    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1630    printf("Search for PATTERN in each FILE or standard input.\n");
1631    printf("PATTERN must be present if neither -e nor -f is used.\n");
1632    printf("\"-\" can be used as a file name to mean STDIN.\n");
1633    
1634    #ifdef SUPPORT_LIBZ
1635    printf("Files whose names end in .gz are read using zlib.\n");
1636    #endif
1637    
1638    #ifdef SUPPORT_LIBBZ2
1639    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1640    #endif
1641    
1642    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1643    printf("Other files and the standard input are read as plain files.\n\n");
1644    #else
1645    printf("All files are read as plain files, without any interpretation.\n\n");
1646    #endif
1647    
1648    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1649    printf("Options:\n");
1650    
1651    for (op = optionlist; op->one_char != 0; op++)
1652      {
1653      int n;
1654      char s[4];
1655      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1656      n = 30 - printf("  %s --%s", s, op->long_name);
1657      if (n < 1) n = 1;
1658      printf("%.*s%s\n", n, "                    ", op->help_text);
1659      }
1660    
1661    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1662    printf("trailing white space is removed and blank lines are ignored.\n");
1663    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1664    
1665    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1666    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1667    }
1668    
1669    
1670    
1671    
1672    /*************************************************
1673    *    Handle a single-letter, no data option      *
1674    *************************************************/
1675    
1676    static int
1677    handle_option(int letter, int options)
1678    {
1679    switch(letter)
1680      {
1681      case N_FOFFSETS: file_offsets = TRUE; break;
1682      case N_HELP: help(); exit(0);
1683      case N_LOFFSETS: line_offsets = number = TRUE; break;
1684      case 'c': count_only = TRUE; break;
1685      case 'F': process_options |= PO_FIXED_STRINGS; break;
1686      case 'H': filenames = FN_FORCE; break;
1687      case 'h': filenames = FN_NONE; break;
1688      case 'i': options |= PCRE_CASELESS; break;
1689      case 'l': filenames = FN_ONLY; break;
1690      case 'L': filenames = FN_NOMATCH_ONLY; break;
1691      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1692      case 'n': number = TRUE; break;
1693      case 'o': only_matching = TRUE; break;
1694      case 'q': quiet = TRUE; break;
1695      case 'r': dee_action = dee_RECURSE; break;
1696      case 's': silent = TRUE; break;
1697      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1698      case 'v': invert = TRUE; break;
1699      case 'w': process_options |= PO_WORD_MATCH; break;
1700      case 'x': process_options |= PO_LINE_MATCH; break;
1701    
1702      case 'V':
1703      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1704      exit(0);
1705      break;
1706    
1707      default:
1708      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1709      exit(usage(2));
1710      }
1711    
1712    return options;
1713    }
1714    
1715    
1716    
1717    
1718    /*************************************************
1719    *          Construct printed ordinal             *
1720    *************************************************/
1721    
1722    /* This turns a number into "1st", "3rd", etc. */
1723    
1724    static char *
1725    ordin(int n)
1726    {
1727    static char buffer[8];
1728    char *p = buffer;
1729    sprintf(p, "%d", n);
1730    while (*p != 0) p++;
1731    switch (n%10)
1732      {
1733      case 1: strcpy(p, "st"); break;
1734      case 2: strcpy(p, "nd"); break;
1735      case 3: strcpy(p, "rd"); break;
1736      default: strcpy(p, "th"); break;
1737      }
1738    return buffer;
1739    }
1740    
1741    
1742    
1743    /*************************************************
1744    *          Compile a single pattern              *
1745    *************************************************/
1746    
1747    /* When the -F option has been used, this is called for each substring.
1748    Otherwise it's called for each supplied pattern.
1749    
1750    Arguments:
1751      pattern        the pattern string
1752      options        the PCRE options
1753      filename       the file name, or NULL for a command-line pattern
1754      count          0 if this is the only command line pattern, or
1755                     number of the command line pattern, or
1756                     linenumber for a pattern from a file
1757    
1758    Returns:         TRUE on success, FALSE after an error
1759    */
1760    
1761    static BOOL
1762    compile_single_pattern(char *pattern, int options, char *filename, int count)
1763    {
1764    char buffer[MBUFTHIRD + 16];
1765    const char *error;
1766    int errptr;
1767    
1768    if (pattern_count >= MAX_PATTERN_COUNT)
1769      {
1770      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1771        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1772      return FALSE;
1773      }
1774    
1775    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1776      suffix[process_options]);
1777    pattern_list[pattern_count] =
1778      pcre_compile(buffer, options, &error, &errptr, pcretables);
1779    if (pattern_list[pattern_count] != NULL)
1780      {
1781      pattern_count++;
1782      return TRUE;
1783      }
1784    
1785    /* Handle compile errors */
1786    
1787    errptr -= (int)strlen(prefix[process_options]);
1788    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1789    
1790    if (filename == NULL)
1791      {
1792      if (count == 0)
1793        fprintf(stderr, "pcregrep: Error in command-line regex "
1794          "at offset %d: %s\n", errptr, error);
1795      else
1796        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1797          "at offset %d: %s\n", ordin(count), errptr, error);
1798      }
1799    else
1800      {
1801      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1802        "at offset %d: %s\n", count, filename, errptr, error);
1803      }
1804    
1805    return FALSE;
1806    }
1807    
1808    
1809    
1810    /*************************************************
1811    *           Compile one supplied pattern         *
1812    *************************************************/
1813    
1814    /* When the -F option has been used, each string may be a list of strings,
1815    separated by line breaks. They will be matched literally.
1816    
1817    Arguments:
1818      pattern        the pattern string
1819      options        the PCRE options
1820      filename       the file name, or NULL for a command-line pattern
1821      count          0 if this is the only command line pattern, or
1822                     number of the command line pattern, or
1823                     linenumber for a pattern from a file
1824    
1825    Returns:         TRUE on success, FALSE after an error
1826    */
1827    
1828    static BOOL
1829    compile_pattern(char *pattern, int options, char *filename, int count)
1830    {
1831    if ((process_options & PO_FIXED_STRINGS) != 0)
1832      {
1833      char *eop = pattern + strlen(pattern);
1834      char buffer[MBUFTHIRD];
1835      for(;;)
1836        {
1837        int ellength;
1838        char *p = end_of_line(pattern, eop, &ellength);
1839        if (ellength == 0)
1840          return compile_single_pattern(pattern, options, filename, count);
1841        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1842        pattern = p;
1843        if (!compile_single_pattern(buffer, options, filename, count))
1844          return FALSE;
1845        }
1846      }
1847    else return compile_single_pattern(pattern, options, filename, count);
1848    }
1849    
1850    
1851    
1852    /*************************************************
1853    *                Main program                    *
1854    *************************************************/
1855    
1856    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1857    
1858    int
1859    main(int argc, char **argv)
1860    {
1861    int i, j;
1862    int rc = 1;
1863    int pcre_options = 0;
1864    int cmd_pattern_count = 0;
1865    int hint_count = 0;
1866    int errptr;
1867    BOOL only_one_at_top;
1868    char *patterns[MAX_PATTERN_COUNT];
1869    const char *locale_from = "--locale";
1870    const char *error;
1871    
1872    /* Set the default line ending value from the default in the PCRE library;
1873    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1874    Note that the return values from pcre_config(), though derived from the ASCII
1875    codes, are the same in EBCDIC environments, so we must use the actual values
1876    rather than escapes such as as '\r'. */
1877    
1878    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1879    switch(i)
1880      {
1881      default:               newline = (char *)"lf"; break;
1882      case 13:               newline = (char *)"cr"; break;
1883      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1884      case -1:               newline = (char *)"any"; break;
1885      case -2:               newline = (char *)"anycrlf"; break;
1886      }
1887    
1888    /* Process the options */
1889    
1890    for (i = 1; i < argc; i++)
1891      {
1892      option_item *op = NULL;
1893      char *option_data = (char *)"";    /* default to keep compiler happy */
1894      BOOL longop;
1895      BOOL longopwasequals = FALSE;
1896    
1897      if (argv[i][0] != '-') break;
1898    
1899      /* If we hit an argument that is just "-", it may be a reference to STDIN,
1900      but only if we have previously had -e or -f to define the patterns. */
1901    
1902      if (argv[i][1] == 0)
1903        {
1904        if (pattern_filename != NULL || pattern_count > 0) break;
1905          else exit(usage(2));
1906        }
1907    
1908      /* Handle a long name option, or -- to terminate the options */
1909    
1910      if (argv[i][1] == '-')
1911        {
1912        char *arg = argv[i] + 2;
1913        char *argequals = strchr(arg, '=');
1914    
1915        if (*arg == 0)    /* -- terminates options */
1916          {
1917          i++;
1918          break;                /* out of the options-handling loop */
1919          }
1920    
1921        longop = TRUE;
1922    
1923        /* Some long options have data that follows after =, for example file=name.
1924        Some options have variations in the long name spelling: specifically, we
1925        allow "regexp" because GNU grep allows it, though I personally go along
1926        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1927        These options are entered in the table as "regex(p)". No option is in both
1928        these categories, fortunately. */
1929    
1930        for (op = optionlist; op->one_char != 0; op++)
1931          {
1932          char *opbra = strchr(op->long_name, '(');
1933          char *equals = strchr(op->long_name, '=');
1934          if (opbra == NULL)     /* Not a (p) case */
1935            {
1936            if (equals == NULL)  /* Not thing=data case */
1937              {
1938              if (strcmp(arg, op->long_name) == 0) break;
1939              }
1940            else                 /* Special case xxx=data */
1941              {
1942              int oplen = equals - op->long_name;
1943              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1944              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1945                {
1946                option_data = arg + arglen;
1947                if (*option_data == '=')
1948                  {
1949                  option_data++;
1950                  longopwasequals = TRUE;
1951                  }
1952                break;
1953                }
1954              }
1955            }
1956          else                   /* Special case xxxx(p) */
1957            {
1958            char buff1[24];
1959            char buff2[24];
1960            int baselen = opbra - op->long_name;
1961            sprintf(buff1, "%.*s", baselen, op->long_name);
1962            sprintf(buff2, "%s%.*s", buff1,
1963              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1964            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1965              break;
1966            }
1967          }
1968    
1969        if (op->one_char == 0)
1970          {
1971          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1972          exit(usage(2));
1973          }
1974        }
1975    
1976    
1977      /* Jeffrey Friedl's debugging harness uses these additional options which
1978      are not in the right form for putting in the option table because they use
1979      only one hyphen, yet are more than one character long. By putting them
1980      separately here, they will not get displayed as part of the help() output,
1981      but I don't think Jeffrey will care about that. */
1982    
1983    #ifdef JFRIEDL_DEBUG
1984      else if (strcmp(argv[i], "-pre") == 0) {
1985              jfriedl_prefix = argv[++i];
1986              continue;
1987      } else if (strcmp(argv[i], "-post") == 0) {
1988              jfriedl_postfix = argv[++i];
1989              continue;
1990      } else if (strcmp(argv[i], "-XT") == 0) {
1991              sscanf(argv[++i], "%d", &jfriedl_XT);
1992              continue;
1993      } else if (strcmp(argv[i], "-XR") == 0) {
1994              sscanf(argv[++i], "%d", &jfriedl_XR);
1995              continue;
1996      }
1997    #endif
1998    
1999    
2000      /* One-char options; many that have no data may be in a single argument; we
2001      continue till we hit the last one or one that needs data. */
2002    
2003      else
2004        {
2005        char *s = argv[i] + 1;
2006        longop = FALSE;
2007        while (*s != 0)
2008          {
2009          for (op = optionlist; op->one_char != 0; op++)
2010            { if (*s == op->one_char) break; }
2011          if (op->one_char == 0)
2012            {
2013            fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2014              *s, argv[i]);
2015            exit(usage(2));
2016            }
2017          if (op->type != OP_NODATA || s[1] == 0)
2018            {
2019            option_data = s+1;
2020            break;
2021            }
2022          pcre_options = handle_option(*s++, pcre_options);
2023          }
2024        }
2025    
2026      /* At this point we should have op pointing to a matched option. If the type
2027      is NO_DATA, it means that there is no data, and the option might set
2028      something in the PCRE options. */
2029    
2030      if (op->type == OP_NODATA)
2031        {
2032        pcre_options = handle_option(op->one_char, pcre_options);
2033        continue;
2034        }
2035    
2036      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2037      either has a value or defaults to something. It cannot have data in a
2038      separate item. At the moment, the only such options are "colo(u)r" and
2039      Jeffrey Friedl's special -S debugging option. */
2040    
2041      if (*option_data == 0 &&
2042          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2043        {
2044        switch (op->one_char)
2045          {
2046          case N_COLOUR:
2047          colour_option = (char *)"auto";
2048          break;
2049    #ifdef JFRIEDL_DEBUG
2050          case 'S':
2051          S_arg = 0;
2052          break;
2053    #endif
2054          }
2055        continue;
2056        }
2057    
2058      /* Otherwise, find the data string for the option. */
2059    
2060      if (*option_data == 0)
2061        {
2062        if (i >= argc - 1 || longopwasequals)
2063          {
2064          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2065          exit(usage(2));
2066          }
2067        option_data = argv[++i];
2068        }
2069    
2070      /* If the option type is OP_PATLIST, it's the -e option, which can be called
2071      multiple times to create a list of patterns. */
2072    
2073      if (op->type == OP_PATLIST)
2074        {
2075        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2076          {
2077          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2078            MAX_PATTERN_COUNT);
2079          return 2;
2080          }
2081        patterns[cmd_pattern_count++] = option_data;
2082        }
2083    
2084      /* Otherwise, deal with single string or numeric data values. */
2085    
2086      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2087        {
2088        *((char **)op->dataptr) = option_data;
2089        }
2090      else
2091        {
2092        char *endptr;
2093        int n = strtoul(option_data, &endptr, 10);
2094        if (*endptr != 0)
2095          {
2096          if (longop)
2097            {
2098            char *equals = strchr(op->long_name, '=');
2099            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2100              equals - op->long_name;
2101            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2102              option_data, nlen, op->long_name);
2103            }
2104          else
2105            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2106              option_data, op->one_char);
2107          exit(usage(2));
2108          }
2109        *((int *)op->dataptr) = n;
2110        }
2111      }
2112    
2113    /* Options have been decoded. If -C was used, its value is used as a default
2114    for -A and -B. */
2115    
2116    if (both_context > 0)
2117      {
2118      if (after_context == 0) after_context = both_context;
2119      if (before_context == 0) before_context = both_context;
2120      }
2121    
2122    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2123    However, the latter two set the only_matching flag. */
2124    
2125    if ((only_matching && (file_offsets || line_offsets)) ||
2126        (file_offsets && line_offsets))
2127      {
2128      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2129        "and/or --line-offsets\n");
2130      exit(usage(2));
2131      }
2132    
2133    if (file_offsets || line_offsets) only_matching = TRUE;
2134    
2135    /* If a locale has not been provided as an option, see if the LC_CTYPE or
2136    LC_ALL environment variable is set, and if so, use it. */
2137    
2138    if (locale == NULL)
2139      {
2140      locale = getenv("LC_ALL");
2141      locale_from = "LCC_ALL";
2142      }
2143    
2144    if (locale == NULL)
2145      {
2146      locale = getenv("LC_CTYPE");
2147      locale_from = "LC_CTYPE";
2148      }
2149    
2150    /* If a locale has been provided, set it, and generate the tables the PCRE
2151    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2152    
2153    if (locale != NULL)
2154      {
2155      if (setlocale(LC_CTYPE, locale) == NULL)
2156        {
2157        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2158          locale, locale_from);
2159        return 2;
2160        }
2161      pcretables = pcre_maketables();
2162      }
2163    
2164    /* Sort out colouring */
2165    
2166    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2167      {
2168      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2169      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2170      else
2171        {
2172        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2173          colour_option);
2174        return 2;
2175        }
2176      if (do_colour)
2177        {
2178        char *cs = getenv("PCREGREP_COLOUR");
2179        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2180        if (cs != NULL) colour_string = cs;
2181        }
2182      }
2183    
2184    /* Interpret the newline type; the default settings are Unix-like. */
2185    
2186    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2187      {
2188      pcre_options |= PCRE_NEWLINE_CR;
2189      endlinetype = EL_CR;
2190      }
2191    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2192      {
2193      pcre_options |= PCRE_NEWLINE_LF;
2194      endlinetype = EL_LF;
2195      }
2196    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2197      {
2198      pcre_options |= PCRE_NEWLINE_CRLF;
2199      endlinetype = EL_CRLF;
2200      }
2201    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2202      {
2203      pcre_options |= PCRE_NEWLINE_ANY;
2204      endlinetype = EL_ANY;
2205      }
2206    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2207      {
2208      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2209      endlinetype = EL_ANYCRLF;
2210      }
2211    else
2212      {
2213      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2214      return 2;
2215      }
2216    
2217    /* Interpret the text values for -d and -D */
2218    
2219    if (dee_option != NULL)
2220      {
2221      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2222      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2223      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2224      else
2225        {
2226        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2227        return 2;
2228        }
2229      }
2230    
2231    if (DEE_option != NULL)
2232      {
2233      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2234      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2235      else
2236        {
2237        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2238        return 2;
2239        }
2240      }
2241    
2242    /* Check the values for Jeffrey Friedl's debugging options. */
2243    
2244    #ifdef JFRIEDL_DEBUG
2245    if (S_arg > 9)
2246      {
2247      fprintf(stderr, "pcregrep: bad value for -S option\n");
2248      return 2;
2249      }
2250    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2251      {
2252      if (jfriedl_XT == 0) jfriedl_XT = 1;
2253      if (jfriedl_XR == 0) jfriedl_XR = 1;
2254      }
2255    #endif
2256    
2257    /* Get memory to store the pattern and hints lists. */
2258    
2259    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2260    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2261    
2262    if (pattern_list == NULL || hints_list == NULL)
2263      {
2264      fprintf(stderr, "pcregrep: malloc failed\n");
2265      goto EXIT2;
2266      }
2267    
2268    /* If no patterns were provided by -e, and there is no file provided by -f,
2269    the first argument is the one and only pattern, and it must exist. */
2270    
2271    if (cmd_pattern_count == 0 && pattern_filename == NULL)
2272      {
2273      if (i >= argc) return usage(2);
2274      patterns[cmd_pattern_count++] = argv[i++];
2275      }
2276    
2277    /* Compile the patterns that were provided on the command line, either by
2278    multiple uses of -e or as a single unkeyed pattern. */
2279    
2280    for (j = 0; j < cmd_pattern_count; j++)
2281      {
2282      if (!compile_pattern(patterns[j], pcre_options, NULL,
2283           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2284        goto EXIT2;
2285      }
2286    
2287    /* Compile the regular expressions that are provided in a file. */
2288    
2289    if (pattern_filename != NULL)
2290      {
2291      int linenumber = 0;
2292      FILE *f;
2293      char *filename;
2294      char buffer[MBUFTHIRD];
2295    
2296      if (strcmp(pattern_filename, "-") == 0)
2297        {
2298        f = stdin;
2299        filename = stdin_name;
2300        }
2301      else
2302        {
2303        f = fopen(pattern_filename, "r");
2304        if (f == NULL)
2305          {
2306          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2307            strerror(errno));
2308          goto EXIT2;
2309          }
2310        filename = pattern_filename;
2311        }
2312    
2313      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2314        {
2315        char *s = buffer + (int)strlen(buffer);
2316        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2317        *s = 0;
2318        linenumber++;
2319        if (buffer[0] == 0) continue;   /* Skip blank lines */
2320        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2321          goto EXIT2;
2322        }
2323    
2324      if (f != stdin) fclose(f);
2325      }
2326    
2327    /* Study the regular expressions, as we will be running them many times */
2328    
2329    for (j = 0; j < pattern_count; j++)
2330      {
2331      hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2332      if (error != NULL)
2333        {
2334        char s[16];
2335        if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2336        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2337        goto EXIT2;
2338        }
2339      hint_count++;
2340      }
2341    
2342    /* If there are include or exclude patterns, compile them. */
2343    
2344    if (exclude_pattern != NULL)
2345      {
2346      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2347        pcretables);
2348      if (exclude_compiled == NULL)
2349        {
2350        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2351          errptr, error);
2352        goto EXIT2;
2353        }
2354      }
2355    
2356    if (include_pattern != NULL)
2357      {
2358      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2359        pcretables);
2360      if (include_compiled == NULL)
2361        {
2362        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2363          errptr, error);
2364        goto EXIT2;
2365        }
2366      }
2367    
2368    if (exclude_dir_pattern != NULL)
2369      {
2370      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2371        pcretables);
2372      if (exclude_dir_compiled == NULL)
2373        {
2374        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2375          errptr, error);
2376        goto EXIT2;
2377        }
2378      }
2379    
2380    if (include_dir_pattern != NULL)
2381      {
2382      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2383        pcretables);
2384      if (include_dir_compiled == NULL)
2385        {
2386        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2387          errptr, error);
2388        goto EXIT2;
2389        }
2390      }
2391    
2392    /* If there are no further arguments, do the business on stdin and exit. */
2393    
2394    if (i >= argc)
2395      {
2396      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2397      goto EXIT;
2398      }
2399    
2400    /* Otherwise, work through the remaining arguments as files or directories.
2401    Pass in the fact that there is only one argument at top level - this suppresses
2402    the file name if the argument is not a directory and filenames are not
2403    otherwise forced. */
2404    
2405    only_one_at_top = i == argc - 1;   /* Catch initial value of i */
2406    
2407    for (; i < argc; i++)
2408      {
2409      int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2410        only_one_at_top);
2411      if (frc > 1) rc = frc;
2412        else if (frc == 0 && rc == 1) rc = 0;
2413      }
2414    
2415    EXIT:
2416    if (pattern_list != NULL)
2417      {
2418      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2419      free(pattern_list);
2420      }
2421    if (hints_list != NULL)
2422      {
2423      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2424      free(hints_list);
2425      }
2426    return rc;
2427    
2428    EXIT2:
2429    rc = 2;
2430    goto EXIT;
2431  }  }
2432    
2433  /* End */  /* End of pcregrep */

Legend:
Removed from v.49  
changed lines
  Added in v.392

  ViewVC Help
Powered by ViewVC 1.1.5