/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 530 by ph10, Tue Jun 1 13:42:06 2010 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.2 09-Jan-2006"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 65  typedef int BOOL; Line 79  typedef int BOOL;
79  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
80  #endif  #endif
81    
   
82  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 100  enum { DEE_READ, DEE_SKIP };
100  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
101  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 94  regular code. */ Line 123  regular code. */
123    
124  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
125  static int S_arg = -1;  static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130  #endif  #endif
131    
132    static int  endlinetype;
133    
134  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
135  static char *colour_option = NULL;  static char *colour_option = NULL;
136  static char *dee_option = NULL;  static char *dee_option = NULL;
137  static char *DEE_option = NULL;  static char *DEE_option = NULL;
138    static char *newline = NULL;
139  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
140  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
141  static char *locale = NULL;  static char *locale = NULL;
# Line 107  static char *locale = NULL; Line 143  static char *locale = NULL;
143  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
144    
145  static int  pattern_count = 0;  static int  pattern_count = 0;
146  static pcre **pattern_list;  static pcre **pattern_list = NULL;
147  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
148    
149  static char *include_pattern = NULL;  static char *include_pattern = NULL;
150  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
155  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159  static int after_context = 0;  static int after_context = 0;
160  static int before_context = 0;  static int before_context = 0;
# Line 127  static int process_options = 0; Line 167  static int process_options = 0;
167    
168  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
169  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
170    static BOOL file_offsets = FALSE;
171  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
172  static BOOL invert = FALSE;  static BOOL invert = FALSE;
173    static BOOL line_buffered = FALSE;
174    static BOOL line_offsets = FALSE;
175  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
176  static BOOL number = FALSE;  static BOOL number = FALSE;
177    static BOOL omit_zero_count = FALSE;
178  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
179  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
180  static BOOL silent = FALSE;  static BOOL silent = FALSE;
181    static BOOL utf8 = FALSE;
182    
183  /* Structure for options and list of them */  /* Structure for options and list of them */
184    
# Line 151  typedef struct option_item { Line 196  typedef struct option_item {
196  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
197  used to identify them. */  used to identify them. */
198    
199  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
200  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
201  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
202  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
203  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
204  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
205  #define N_NULL      (-7)  #define N_LABEL        (-7)
206    #define N_LOCALE       (-8)
207    #define N_NULL         (-9)
208    #define N_LOFFSETS     (-10)
209    #define N_FOFFSETS     (-11)
210    #define N_LBUFFER      (-12)
211    
212  static option_item optionlist[] = {  static option_item optionlist[] = {
213    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 170  static option_item optionlist[] = { Line 220  static option_item optionlist[] = {
220    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
221    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
222    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
223    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
224    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
225    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
226      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
227    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
228    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
229    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
230    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
231    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
232    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
233      { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
234      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
235    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
236    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
237      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
238    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
239    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
240    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
241    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
242    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
243    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
244      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
245      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
246  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
247    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
248  #endif  #endif
# Line 210  static const char *prefix[] = { Line 266  static const char *prefix[] = {
266  static const char *suffix[] = {  static const char *suffix[] = {
267    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
268    
269    /* UTF-8 tables - used only when the newline setting is "any". */
270    
271    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
272    
273    const char utf8_table4[] = {
274      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
275      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
276      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
277      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
278    
279    
280    
281  /*************************************************  /*************************************************
# Line 222  although at present the only ones are fo Line 288  although at present the only ones are fo
288    
289  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
290    
291  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
292  #include <sys/types.h>  #include <sys/types.h>
293  #include <sys/stat.h>  #include <sys/stat.h>
294  #include <dirent.h>  #include <dirent.h>
# Line 254  for (;;) Line 320  for (;;)
320    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
321      return dent->d_name;      return dent->d_name;
322    }    }
323  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
324  }  }
325    
326  static void  static void
# Line 276  return (statbuf.st_mode & S_IFMT) == S_I Line 342  return (statbuf.st_mode & S_IFMT) == S_I
342  }  }
343    
344    
345  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
346    
347  static BOOL  static BOOL
348  is_stdout_tty(void)  is_stdout_tty(void)
# Line 284  is_stdout_tty(void) Line 350  is_stdout_tty(void)
350  return isatty(fileno(stdout));  return isatty(fileno(stdout));
351  }  }
352    
353    static BOOL
354    is_file_tty(FILE *f)
355    {
356    return isatty(fileno(f));
357    }
358    
359    
360  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
361    
362  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
363  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
364  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
365    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
366    */
367    
368  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
369    
370  #ifndef STRICT  #ifndef STRICT
371  # define STRICT  # define STRICT
# Line 300  when it did not exist. */ Line 373  when it did not exist. */
373  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
374  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
375  #endif  #endif
376    
377    #include <windows.h>
378    
379  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
380  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
381  #endif  #endif
382    
 #include <windows.h>  
   
383  typedef struct directory_type  typedef struct directory_type
384  {  {
385  HANDLE handle;  HANDLE handle;
# Line 390  regular if they are not directories. */ Line 464  regular if they are not directories. */
464    
465  int isregfile(char *filename)  int isregfile(char *filename)
466  {  {
467  return !isdirectory(filename)  return !isdirectory(filename);
468  }  }
469    
470    
471  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
472    
473  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
474    
475  static BOOL  static BOOL
476  is_stdout_tty(void)  is_stdout_tty(void)
477  {  {
478  FALSE;  return FALSE;
479    }
480    
481    static BOOL
482    is_file_tty(FILE *f)
483    {
484    return FALSE;
485  }  }
486    
487    
# Line 414  FALSE; Line 494  FALSE;
494  typedef void directory_type;  typedef void directory_type;
495    
496  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
497  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
498  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
499  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
500    
501    
# Line 426  void closedirectory(directory_type *dir) Line 506  void closedirectory(directory_type *dir)
506  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
507    
508    
509  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
510    
511  static BOOL  static BOOL
512  is_stdout_tty(void)  is_stdout_tty(void)
# Line 434  is_stdout_tty(void) Line 514  is_stdout_tty(void)
514  return FALSE;  return FALSE;
515  }  }
516    
517    static BOOL
518    is_file_tty(FILE *f)
519    {
520    return FALSE;
521    }
522    
523  #endif  #endif
524    
525    
526    
527  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
528  /*************************************************  /*************************************************
529  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
530  *************************************************/  *************************************************/
# Line 462  return sys_errlist[n]; Line 547  return sys_errlist[n];
547    
548    
549  /*************************************************  /*************************************************
550    *            Read one line of input              *
551    *************************************************/
552    
553    /* Normally, input is read using fread() into a large buffer, so many lines may
554    be read at once. However, doing this for tty input means that no output appears
555    until a lot of input has been typed. Instead, tty input is handled line by
556    line. We cannot use fgets() for this, because it does not stop at a binary
557    zero, and therefore there is no way of telling how many characters it has read,
558    because there may be binary zeros embedded in the data.
559    
560    Arguments:
561      buffer     the buffer to read into
562      length     the maximum number of characters to read
563      f          the file
564    
565    Returns:     the number of characters read, zero at end of file
566    */
567    
568    static int
569    read_one_line(char *buffer, int length, FILE *f)
570    {
571    int c;
572    int yield = 0;
573    while ((c = fgetc(f)) != EOF)
574      {
575      buffer[yield++] = c;
576      if (c == '\n' || yield >= length) break;
577      }
578    return yield;
579    }
580    
581    
582    
583    /*************************************************
584    *             Find end of line                   *
585    *************************************************/
586    
587    /* The length of the endline sequence that is found is set via lenptr. This may
588    be zero at the very end of the file if there is no line-ending sequence there.
589    
590    Arguments:
591      p         current position in line
592      endptr    end of available data
593      lenptr    where to put the length of the eol sequence
594    
595    Returns:    pointer to the last byte of the line
596    */
597    
598    static char *
599    end_of_line(char *p, char *endptr, int *lenptr)
600    {
601    switch(endlinetype)
602      {
603      default:      /* Just in case */
604      case EL_LF:
605      while (p < endptr && *p != '\n') p++;
606      if (p < endptr)
607        {
608        *lenptr = 1;
609        return p + 1;
610        }
611      *lenptr = 0;
612      return endptr;
613    
614      case EL_CR:
615      while (p < endptr && *p != '\r') p++;
616      if (p < endptr)
617        {
618        *lenptr = 1;
619        return p + 1;
620        }
621      *lenptr = 0;
622      return endptr;
623    
624      case EL_CRLF:
625      for (;;)
626        {
627        while (p < endptr && *p != '\r') p++;
628        if (++p >= endptr)
629          {
630          *lenptr = 0;
631          return endptr;
632          }
633        if (*p == '\n')
634          {
635          *lenptr = 2;
636          return p + 1;
637          }
638        }
639      break;
640    
641      case EL_ANYCRLF:
642      while (p < endptr)
643        {
644        int extra = 0;
645        register int c = *((unsigned char *)p);
646    
647        if (utf8 && c >= 0xc0)
648          {
649          int gcii, gcss;
650          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
651          gcss = 6*extra;
652          c = (c & utf8_table3[extra]) << gcss;
653          for (gcii = 1; gcii <= extra; gcii++)
654            {
655            gcss -= 6;
656            c |= (p[gcii] & 0x3f) << gcss;
657            }
658          }
659    
660        p += 1 + extra;
661    
662        switch (c)
663          {
664          case 0x0a:    /* LF */
665          *lenptr = 1;
666          return p;
667    
668          case 0x0d:    /* CR */
669          if (p < endptr && *p == 0x0a)
670            {
671            *lenptr = 2;
672            p++;
673            }
674          else *lenptr = 1;
675          return p;
676    
677          default:
678          break;
679          }
680        }   /* End of loop for ANYCRLF case */
681    
682      *lenptr = 0;  /* Must have hit the end */
683      return endptr;
684    
685      case EL_ANY:
686      while (p < endptr)
687        {
688        int extra = 0;
689        register int c = *((unsigned char *)p);
690    
691        if (utf8 && c >= 0xc0)
692          {
693          int gcii, gcss;
694          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
695          gcss = 6*extra;
696          c = (c & utf8_table3[extra]) << gcss;
697          for (gcii = 1; gcii <= extra; gcii++)
698            {
699            gcss -= 6;
700            c |= (p[gcii] & 0x3f) << gcss;
701            }
702          }
703    
704        p += 1 + extra;
705    
706        switch (c)
707          {
708          case 0x0a:    /* LF */
709          case 0x0b:    /* VT */
710          case 0x0c:    /* FF */
711          *lenptr = 1;
712          return p;
713    
714          case 0x0d:    /* CR */
715          if (p < endptr && *p == 0x0a)
716            {
717            *lenptr = 2;
718            p++;
719            }
720          else *lenptr = 1;
721          return p;
722    
723          case 0x85:    /* NEL */
724          *lenptr = utf8? 2 : 1;
725          return p;
726    
727          case 0x2028:  /* LS */
728          case 0x2029:  /* PS */
729          *lenptr = 3;
730          return p;
731    
732          default:
733          break;
734          }
735        }   /* End of loop for ANY case */
736    
737      *lenptr = 0;  /* Must have hit the end */
738      return endptr;
739      }     /* End of overall switch */
740    }
741    
742    
743    
744    /*************************************************
745    *         Find start of previous line            *
746    *************************************************/
747    
748    /* This is called when looking back for before lines to print.
749    
750    Arguments:
751      p         start of the subsequent line
752      startptr  start of available data
753    
754    Returns:    pointer to the start of the previous line
755    */
756    
757    static char *
758    previous_line(char *p, char *startptr)
759    {
760    switch(endlinetype)
761      {
762      default:      /* Just in case */
763      case EL_LF:
764      p--;
765      while (p > startptr && p[-1] != '\n') p--;
766      return p;
767    
768      case EL_CR:
769      p--;
770      while (p > startptr && p[-1] != '\n') p--;
771      return p;
772    
773      case EL_CRLF:
774      for (;;)
775        {
776        p -= 2;
777        while (p > startptr && p[-1] != '\n') p--;
778        if (p <= startptr + 1 || p[-2] == '\r') return p;
779        }
780      return p;   /* But control should never get here */
781    
782      case EL_ANY:
783      case EL_ANYCRLF:
784      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
785      if (utf8) while ((*p & 0xc0) == 0x80) p--;
786    
787      while (p > startptr)
788        {
789        register int c;
790        char *pp = p - 1;
791    
792        if (utf8)
793          {
794          int extra = 0;
795          while ((*pp & 0xc0) == 0x80) pp--;
796          c = *((unsigned char *)pp);
797          if (c >= 0xc0)
798            {
799            int gcii, gcss;
800            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
801            gcss = 6*extra;
802            c = (c & utf8_table3[extra]) << gcss;
803            for (gcii = 1; gcii <= extra; gcii++)
804              {
805              gcss -= 6;
806              c |= (pp[gcii] & 0x3f) << gcss;
807              }
808            }
809          }
810        else c = *((unsigned char *)pp);
811    
812        if (endlinetype == EL_ANYCRLF) switch (c)
813          {
814          case 0x0a:    /* LF */
815          case 0x0d:    /* CR */
816          return p;
817    
818          default:
819          break;
820          }
821    
822        else switch (c)
823          {
824          case 0x0a:    /* LF */
825          case 0x0b:    /* VT */
826          case 0x0c:    /* FF */
827          case 0x0d:    /* CR */
828          case 0x85:    /* NEL */
829          case 0x2028:  /* LS */
830          case 0x2029:  /* PS */
831          return p;
832    
833          default:
834          break;
835          }
836    
837        p = pp;  /* Back one character */
838        }        /* End of loop for ANY case */
839    
840      return startptr;  /* Hit start of data */
841      }     /* End of overall switch */
842    }
843    
844    
845    
846    
847    
848    /*************************************************
849  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
850  *************************************************/  *************************************************/
851    
# Line 486  if (after_context > 0 && lastmatchnumber Line 870  if (after_context > 0 && lastmatchnumber
870    int count = 0;    int count = 0;
871    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
872      {      {
873        int ellength;
874      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
875      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
876      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
877      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
878      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
879      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
880      }      }
881    hyphenpending = TRUE;    hyphenpending = TRUE;
882    }    }
# Line 500  if (after_context > 0 && lastmatchnumber Line 885  if (after_context > 0 && lastmatchnumber
885    
886    
887  /*************************************************  /*************************************************
888    *   Apply patterns to subject till one matches   *
889    *************************************************/
890    
891    /* This function is called to run through all patterns, looking for a match. It
892    is used multiple times for the same subject when colouring is enabled, in order
893    to find all possible matches.
894    
895    Arguments:
896      matchptr    the start of the subject
897      length      the length of the subject to match
898      offsets     the offets vector to fill in
899      mrc         address of where to put the result of pcre_exec()
900    
901    Returns:      TRUE if there was a match
902                  FALSE if there was no match
903                  invert if there was a non-fatal error
904    */
905    
906    static BOOL
907    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
908    {
909    int i;
910    for (i = 0; i < pattern_count; i++)
911      {
912      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
913        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
914      if (*mrc >= 0) return TRUE;
915      if (*mrc == PCRE_ERROR_NOMATCH) continue;
916      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
917      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
918      fprintf(stderr, "this text:\n");
919      FWRITE(matchptr, 1, length, stderr);   /* In case binary zero included */
920      fprintf(stderr, "\n");
921      if (error_count == 0 &&
922          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
923        {
924        fprintf(stderr, "pcregrep: error %d means that a resource limit "
925          "was exceeded\n", *mrc);
926        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
927        }
928      if (error_count++ > 20)
929        {
930        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
931        exit(2);
932        }
933      return invert;    /* No more matching; don't show the line again */
934      }
935    
936    return FALSE;  /* No match, no errors */
937    }
938    
939    
940    
941    /*************************************************
942  *            Grep an individual file             *  *            Grep an individual file             *
943  *************************************************/  *************************************************/
944    
# Line 511  be in the middle third most of the time, Line 950  be in the middle third most of the time,
950  "before" context printing.  "before" context printing.
951    
952  Arguments:  Arguments:
953    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
954                   the gzFile pointer when reading is via libz
955                   the BZFILE pointer when reading is via libbz2
956      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
957    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
958                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
959                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
960    
961  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
962                 1 otherwise (no matches)                 1 otherwise (no matches)
963                   2 if there is a read error on a .bz2 file
964  */  */
965    
966  static int  static int
967  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
968  {  {
969  int rc = 1;  int rc = 1;
970  int linenumber = 1;  int linenumber = 1;
971  int lastmatchnumber = 0;  int lastmatchnumber = 0;
972  int count = 0;  int count = 0;
973  int offsets[99];  int filepos = 0;
974    int offsets[OFFSET_SIZE];
975  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
976  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
977  char *ptr = buffer;  char *ptr = buffer;
978  char *endptr;  char *endptr;
979  size_t bufflength;  size_t bufflength;
980  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
981    BOOL input_line_buffered = line_buffered;
982    FILE *in = NULL;                    /* Ensure initialized */
983    
984  /* Do the first read into the start of the buffer and set up the pointer to  #ifdef SUPPORT_LIBZ
985  end of what we have. */  gzFile ingz = NULL;
986    #endif
987    
988    #ifdef SUPPORT_LIBBZ2
989    BZFILE *inbz2 = NULL;
990    #endif
991    
992    
993    /* Do the first read into the start of the buffer and set up the pointer to end
994    of what we have. In the case of libz, a non-zipped .gz file will be read as a
995    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
996    fail. */
997    
998    #ifdef SUPPORT_LIBZ
999    if (frtype == FR_LIBZ)
1000      {
1001      ingz = (gzFile)handle;
1002      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1003      }
1004    else
1005    #endif
1006    
1007  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  #ifdef SUPPORT_LIBBZ2
1008    if (frtype == FR_LIBBZ2)
1009      {
1010      inbz2 = (BZFILE *)handle;
1011      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1012      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1013      }                                    /* without the cast it is unsigned. */
1014    else
1015    #endif
1016    
1017      {
1018      in = (FILE *)handle;
1019      if (is_file_tty(in)) input_line_buffered = TRUE;
1020      bufflength = input_line_buffered?
1021        read_one_line(buffer, 3*MBUFTHIRD, in) :
1022        fread(buffer, 1, 3*MBUFTHIRD, in);
1023      }
1024    
1025  endptr = buffer + bufflength;  endptr = buffer + bufflength;
1026    
1027  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 548  way, the buffer is shifted left and re-f Line 1031  way, the buffer is shifted left and re-f
1031    
1032  while (ptr < endptr)  while (ptr < endptr)
1033    {    {
1034    int i;    int endlinelength;
1035    int mrc = 0;    int mrc = 0;
1036    BOOL match = FALSE;    BOOL match;
1037      char *matchptr = ptr;
1038    char *t = ptr;    char *t = ptr;
1039    size_t length, linelength;    size_t length, linelength;
1040    
1041    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1042    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1043    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1044    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1045    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1046    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1047      first line. */
1048    linelength = 0;  
1049    while (t < endptr && *t++ != '\n') linelength++;    t = end_of_line(t, endptr, &endlinelength);
1050    length = multiline? endptr - ptr : linelength;    linelength = t - ptr - endlinelength;
1051      length = multiline? (size_t)(endptr - ptr) : linelength;
1052    /* Run through all the patterns until one matches. Note that we don't include  
1053    the final newline in the subject string. */    /* Extra processing for Jeffrey Friedl's debugging. */
1054    
1055    for (i = 0; i < pattern_count; i++)  #ifdef JFRIEDL_DEBUG
1056      {    if (jfriedl_XT || jfriedl_XR)
1057      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    {
1058        offsets, 99);        #include <sys/time.h>
1059      if (mrc >= 0) { match = TRUE; break; }        #include <time.h>
1060      if (mrc != PCRE_ERROR_NOMATCH)        struct timeval start_time, end_time;
1061        {        struct timezone dummy;
1062        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);        int i;
1063        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
1064        fprintf(stderr, "this line:\n");        if (jfriedl_XT)
1065        fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */        {
1066        fprintf(stderr, "\n");            unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1067        if (error_count == 0 &&            const char *orig = ptr;
1068            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))            ptr = malloc(newlen + 1);
1069          {            if (!ptr) {
1070          fprintf(stderr, "pcregrep: error %d means that a resource limit "                    printf("out of memory");
1071            "was exceeded\n", mrc);                    exit(2);
1072          fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");            }
1073          }            endptr = ptr;
1074        if (error_count++ > 20)            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1075          {            for (i = 0; i < jfriedl_XT; i++) {
1076          fprintf(stderr, "pcregrep: too many errors - abandoned\n");                    strncpy(endptr, orig,  length);
1077          exit(2);                    endptr += length;
1078          }            }
1079        match = invert;    /* No more matching; don't show the line again */            strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1080        break;            length = newlen;
1081        }        }
1082      }  
1083          if (gettimeofday(&start_time, &dummy) != 0)
1084                  perror("bad gettimeofday");
1085    
1086    
1087          for (i = 0; i < jfriedl_XR; i++)
1088              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1089                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1090    
1091          if (gettimeofday(&end_time, &dummy) != 0)
1092                  perror("bad gettimeofday");
1093    
1094          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1095                          -
1096                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1097    
1098          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1099          return 0;
1100      }
1101    #endif
1102    
1103      /* We come back here after a match when the -o option (only_matching) is set,
1104      in order to find any further matches in the same line. */
1105    
1106      ONLY_MATCHING_RESTART:
1107    
1108      /* Run through all the patterns until one matches or there is an error other
1109      than NOMATCH. This code is in a subroutine so that it can be re-used for
1110      finding subsequent matches when colouring matched lines. */
1111    
1112      match = match_patterns(matchptr, length, offsets, &mrc);
1113    
1114    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1115    
# Line 614  while (ptr < endptr) Line 1128  while (ptr < endptr)
1128      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1129      in the file. */      in the file. */
1130    
1131      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1132        {        {
1133        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1134        return 0;        return 0;
# Line 625  while (ptr < endptr) Line 1139  while (ptr < endptr)
1139      else if (quiet) return 0;      else if (quiet) return 0;
1140    
1141      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
1142      does not pring any context. */      the --file-offsets and --line-offsets options output offsets for the
1143        matching substring (they both force --only-matching). None of these options
1144        prints any context. Afterwards, adjust the start and length, and then jump
1145        back to look for further matches in the same line. If we are in invert
1146        mode, however, nothing is printed - this could be still useful because the
1147        return code is set. */
1148    
1149      else if (only_matching)      else if (only_matching)
1150        {        {
1151        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1152        if (number) fprintf(stdout, "%d:", linenumber);          {
1153        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1154        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1155            if (line_offsets)
1156              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1157                offsets[1] - offsets[0]);
1158            else if (file_offsets)
1159              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1160                offsets[1] - offsets[0]);
1161            else
1162              {
1163              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1164              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1165              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1166              }
1167            fprintf(stdout, "\n");
1168            matchptr += offsets[1];
1169            length -= offsets[1];
1170            match = FALSE;
1171            goto ONLY_MATCHING_RESTART;
1172            }
1173        }        }
1174    
1175      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 646  while (ptr < endptr) Line 1183  while (ptr < endptr)
1183    
1184        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1185          {          {
1186            int ellength;
1187          int linecount = 0;          int linecount = 0;
1188          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1189    
1190          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1191            {            {
1192            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1193            linecount++;            linecount++;
1194            }            }
1195    
# Line 665  while (ptr < endptr) Line 1202  while (ptr < endptr)
1202            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1203            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1204            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1205            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1206            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1207            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1208            }            }
1209          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1210          }          }
# Line 693  while (ptr < endptr) Line 1230  while (ptr < endptr)
1230                 linecount < before_context)                 linecount < before_context)
1231            {            {
1232            linecount++;            linecount++;
1233            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1234            }            }
1235    
1236          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 702  while (ptr < endptr) Line 1238  while (ptr < endptr)
1238    
1239          while (p < ptr)          while (p < ptr)
1240            {            {
1241              int ellength;
1242            char *pp = p;            char *pp = p;
1243            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1244            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1245            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1246            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            FWRITE(p, 1, pp - p, stdout);
1247            p = pp + 1;            p = pp;
1248            }            }
1249          }          }
1250    
# Line 722  while (ptr < endptr) Line 1259  while (ptr < endptr)
1259    
1260        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1261        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1262        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1263        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1264          the match will always be before the first newline sequence. */
1265    
1266        if (multiline)        if (multiline)
1267          {          {
1268          char *endmatch = ptr + offsets[1];          int ellength;
1269          t = ptr;          char *endmatch = ptr;
1270          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          if (!invert)
1271          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1272          linelength = endmatch - ptr;            endmatch += offsets[1];
1273              t = ptr;
1274              while (t < endmatch)
1275                {
1276                t = end_of_line(t, endptr, &ellength);
1277                if (t <= endmatch) linenumber++; else break;
1278                }
1279              }
1280            endmatch = end_of_line(endmatch, endptr, &ellength);
1281            linelength = endmatch - ptr - ellength;
1282          }          }
1283    
1284        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 746  while (ptr < endptr) Line 1293  while (ptr < endptr)
1293          {          {
1294          int first = S_arg * 2;          int first = S_arg * 2;
1295          int last  = first + 1;          int last  = first + 1;
1296          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1297          fprintf(stdout, "X");          fprintf(stdout, "X");
1298          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1299          }          }
1300        else        else
1301  #endif  #endif
1302    
1303        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1304          matches. */
1305    
1306        if (do_colour)        if (do_colour)
1307          {          {
1308          fwrite(ptr, 1, offsets[0], stdout);          int last_offset = 0;
1309            FWRITE(ptr, 1, offsets[0], stdout);
1310          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1311          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1312          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1313          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1314              {
1315              last_offset += offsets[1];
1316              matchptr += offsets[1];
1317              length -= offsets[1];
1318              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1319              FWRITE(matchptr, 1, offsets[0], stdout);
1320              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1321              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1322              fprintf(stdout, "%c[00m", 0x1b);
1323              }
1324            FWRITE(ptr + last_offset, 1,
1325              (linelength + endlinelength) - last_offset, stdout);
1326          }          }
       else fwrite(ptr, 1, linelength, stdout);  
1327    
1328        fprintf(stdout, "\n");        /* Not colouring; no need to search for further matches */
1329    
1330          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1331        }        }
1332    
1333      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1334        given, flush the output. */
1335    
1336        if (line_buffered) fflush(stdout);
1337      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1338    
1339      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1340      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1341    
1342      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1343      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1344      }      }
1345    
1346    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1347      anything to be printed), we have to move on to the end of the match before
1348      proceeding. */
1349    
1350      if (multiline && invert && match)
1351        {
1352        int ellength;
1353        char *endmatch = ptr + offsets[1];
1354        t = ptr;
1355        while (t < endmatch)
1356          {
1357          t = end_of_line(t, endptr, &ellength);
1358          if (t <= endmatch) linenumber++; else break;
1359          }
1360        endmatch = end_of_line(endmatch, endptr, &ellength);
1361        linelength = endmatch - ptr - ellength;
1362        }
1363    
1364      /* Advance to after the newline and increment the line number. The file
1365      offset to the current line is maintained in filepos. */
1366    
1367    ptr += linelength + 1;    ptr += linelength + endlinelength;
1368      filepos += (int)(linelength + endlinelength);
1369    linenumber++;    linenumber++;
1370    
1371      /* If input is line buffered, and the buffer is not yet full, read another
1372      line and add it into the buffer. */
1373    
1374      if (input_line_buffered && bufflength < sizeof(buffer))
1375        {
1376        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1377        bufflength += add;
1378        endptr += add;
1379        }
1380    
1381    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1382    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
# Line 803  while (ptr < endptr) Line 1397  while (ptr < endptr)
1397    
1398      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1399      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1400      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1401    #ifdef SUPPORT_LIBZ
1402        if (frtype == FR_LIBZ)
1403          bufflength = 2*MBUFTHIRD +
1404            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1405        else
1406    #endif
1407    
1408    #ifdef SUPPORT_LIBBZ2
1409        if (frtype == FR_LIBBZ2)
1410          bufflength = 2*MBUFTHIRD +
1411            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1412        else
1413    #endif
1414    
1415        bufflength = 2*MBUFTHIRD +
1416          (input_line_buffered?
1417           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1418           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1419      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1420    
1421      /* Adjust any last match point */      /* Adjust any last match point */
# Line 834  if (filenames == FN_NOMATCH_ONLY) Line 1446  if (filenames == FN_NOMATCH_ONLY)
1446    
1447  if (count_only)  if (count_only)
1448    {    {
1449    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1450    fprintf(stdout, "%d\n", count);      {
1451        if (printname != NULL && filenames != FN_NONE)
1452          fprintf(stdout, "%s:", printname);
1453        fprintf(stdout, "%d\n", count);
1454        }
1455    }    }
1456    
1457  return rc;  return rc;
# Line 867  grep_or_recurse(char *pathname, BOOL dir Line 1483  grep_or_recurse(char *pathname, BOOL dir
1483  {  {
1484  int rc = 1;  int rc = 1;
1485  int sep;  int sep;
1486  FILE *in;  int frtype;
1487    int pathlen;
1488    void *handle;
1489    FILE *in = NULL;           /* Ensure initialized */
1490    
1491    #ifdef SUPPORT_LIBZ
1492    gzFile ingz = NULL;
1493    #endif
1494    
1495    #ifdef SUPPORT_LIBBZ2
1496    BZFILE *inbz2 = NULL;
1497    #endif
1498    
1499  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1500    
1501  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1502    {    {
1503    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1504      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1505        stdin_name : NULL);        stdin_name : NULL);
1506    }    }
1507    
   
1508  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1509  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1510  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1511    system-specific. */
1512    
1513  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1514    {    {
# Line 902  if ((sep = isdirectory(pathname)) != 0) Line 1529  if ((sep = isdirectory(pathname)) != 0)
1529    
1530      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1531        {        {
1532        int frc, blen;        int frc, nflen;
1533        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1534        blen = strlen(buffer);        nflen = (int)(strlen(nextfile));
1535    
1536        if (exclude_compiled != NULL &&        if (isdirectory(buffer))
1537            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)          {
1538          continue;          if (exclude_dir_compiled != NULL &&
1539                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1540        if (include_compiled != NULL &&            continue;
1541            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
1542          continue;          if (include_dir_compiled != NULL &&
1543                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1544              continue;
1545            }
1546          else
1547            {
1548            if (exclude_compiled != NULL &&
1549                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1550              continue;
1551    
1552            if (include_compiled != NULL &&
1553                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1554              continue;
1555            }
1556    
1557        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1558        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 935  skipping was not requested. The scan pro Line 1575  skipping was not requested. The scan pro
1575  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1576  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1577    
1578  in = fopen(pathname, "r");  pathlen = (int)(strlen(pathname));
1579  if (in == NULL)  
1580    /* Open using zlib if it is supported and the file name ends with .gz. */
1581    
1582    #ifdef SUPPORT_LIBZ
1583    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1584      {
1585      ingz = gzopen(pathname, "rb");
1586      if (ingz == NULL)
1587        {
1588        if (!silent)
1589          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1590            strerror(errno));
1591        return 2;
1592        }
1593      handle = (void *)ingz;
1594      frtype = FR_LIBZ;
1595      }
1596    else
1597    #endif
1598    
1599    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1600    
1601    #ifdef SUPPORT_LIBBZ2
1602    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1603      {
1604      inbz2 = BZ2_bzopen(pathname, "rb");
1605      handle = (void *)inbz2;
1606      frtype = FR_LIBBZ2;
1607      }
1608    else
1609    #endif
1610    
1611    /* Otherwise use plain fopen(). The label is so that we can come back here if
1612    an attempt to read a .bz2 file indicates that it really is a plain file. */
1613    
1614    #ifdef SUPPORT_LIBBZ2
1615    PLAIN_FILE:
1616    #endif
1617      {
1618      in = fopen(pathname, "rb");
1619      handle = (void *)in;
1620      frtype = FR_PLAIN;
1621      }
1622    
1623    /* All the opening methods return errno when they fail. */
1624    
1625    if (handle == NULL)
1626    {    {
1627    if (!silent)    if (!silent)
1628      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 944  if (in == NULL) Line 1630  if (in == NULL)
1630    return 2;    return 2;
1631    }    }
1632    
1633  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1634    
1635    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1636    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1637    
1638    /* Close in an appropriate manner. */
1639    
1640    #ifdef SUPPORT_LIBZ
1641    if (frtype == FR_LIBZ)
1642      gzclose(ingz);
1643    else
1644    #endif
1645    
1646    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1647    read failed. If the error indicates that the file isn't in fact bzipped, try
1648    again as a normal file. */
1649    
1650    #ifdef SUPPORT_LIBBZ2
1651    if (frtype == FR_LIBBZ2)
1652      {
1653      if (rc == 2)
1654        {
1655        int errnum;
1656        const char *err = BZ2_bzerror(inbz2, &errnum);
1657        if (errnum == BZ_DATA_ERROR_MAGIC)
1658          {
1659          BZ2_bzclose(inbz2);
1660          goto PLAIN_FILE;
1661          }
1662        else if (!silent)
1663          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1664            pathname, err);
1665        }
1666      BZ2_bzclose(inbz2);
1667      }
1668    else
1669    #endif
1670    
1671    /* Normal file close */
1672    
1673  fclose(in);  fclose(in);
1674    
1675    /* Pass back the yield from pcregrep(). */
1676    
1677  return rc;  return rc;
1678  }  }
1679    
# Line 968  for (op = optionlist; op->one_char != 0; Line 1694  for (op = optionlist; op->one_char != 0;
1694    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1695    }    }
1696  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1697  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1698      "options.\n");
1699  return rc;  return rc;
1700  }  }
1701    
# Line 987  option_item *op; Line 1714  option_item *op;
1714  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1715  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1716  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1717  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1718  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1719    #ifdef SUPPORT_LIBZ
1720    printf("Files whose names end in .gz are read using zlib.\n");
1721    #endif
1722    
1723    #ifdef SUPPORT_LIBBZ2
1724    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1725    #endif
1726    
1727    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1728    printf("Other files and the standard input are read as plain files.\n\n");
1729    #else
1730    printf("All files are read as plain files, without any interpretation.\n\n");
1731    #endif
1732    
1733    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1734  printf("Options:\n");  printf("Options:\n");
1735    
1736  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 997  for (op = optionlist; op->one_char != 0; Line 1738  for (op = optionlist; op->one_char != 0;
1738    int n;    int n;
1739    char s[4];    char s[4];
1740    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1741    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1742    if (n < 1) n = 1;    if (n < 1) n = 1;
1743    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1744    }    }
# Line 1023  handle_option(int letter, int options) Line 1763  handle_option(int letter, int options)
1763  {  {
1764  switch(letter)  switch(letter)
1765    {    {
1766      case N_FOFFSETS: file_offsets = TRUE; break;
1767    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1768      case N_LOFFSETS: line_offsets = number = TRUE; break;
1769      case N_LBUFFER: line_buffered = TRUE; break;
1770    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1771    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1772    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1773    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1774    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1775    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1776    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1777    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1778    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
# Line 1037  switch(letter) Line 1780  switch(letter)
1780    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1781    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1782    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1783    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1784    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1785    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1786    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1787    
1788    case 'V':    case 'V':
1789    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1790    exit(0);    exit(0);
1791    break;    break;
1792    
# Line 1120  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1862  sprintf(buffer, "%s%.*s%s", prefix[proce
1862    suffix[process_options]);    suffix[process_options]);
1863  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1864    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1865  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1866      {
1867      pattern_count++;
1868      return TRUE;
1869      }
1870    
1871  /* Handle compile errors */  /* Handle compile errors */
1872    
# Line 1152  return FALSE; Line 1898  return FALSE;
1898  *************************************************/  *************************************************/
1899    
1900  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1901  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1902    
1903  Arguments:  Arguments:
1904    pattern        the pattern string    pattern        the pattern string
# Line 1170  compile_pattern(char *pattern, int optio Line 1916  compile_pattern(char *pattern, int optio
1916  {  {
1917  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1918    {    {
1919      char *eop = pattern + strlen(pattern);
1920    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1921    for(;;)    for(;;)
1922      {      {
1923      char *p = strchr(pattern, '\n');      int ellength;
1924      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1925        if (ellength == 0)
1926        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1927      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1928      pattern = p + 1;      pattern = p;
1929      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1930        return FALSE;        return FALSE;
1931      }      }
# Line 1200  int i, j; Line 1948  int i, j;
1948  int rc = 1;  int rc = 1;
1949  int pcre_options = 0;  int pcre_options = 0;
1950  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1951    int hint_count = 0;
1952  int errptr;  int errptr;
1953  BOOL only_one_at_top;  BOOL only_one_at_top;
1954  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
1955  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1956  const char *error;  const char *error;
1957    
1958    /* Set the default line ending value from the default in the PCRE library;
1959    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1960    Note that the return values from pcre_config(), though derived from the ASCII
1961    codes, are the same in EBCDIC environments, so we must use the actual values
1962    rather than escapes such as as '\r'. */
1963    
1964    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1965    switch(i)
1966      {
1967      default:               newline = (char *)"lf"; break;
1968      case 13:               newline = (char *)"cr"; break;
1969      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1970      case -1:               newline = (char *)"any"; break;
1971      case -2:               newline = (char *)"anycrlf"; break;
1972      }
1973    
1974  /* Process the options */  /* Process the options */
1975    
1976  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1245  for (i = 1; i < argc; i++) Line 2010  for (i = 1; i < argc; i++)
2010      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2011      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2012      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2013      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2014      these categories, fortunately. */      both these categories. */
2015    
2016      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2017        {        {
2018        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2019        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2020        if (opbra == NULL)     /* Not a (p) case */  
2021          /* Handle options with only one spelling of the name */
2022    
2023          if (opbra == NULL)     /* Does not contain '(' */
2024          {          {
2025          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2026            {            {
# Line 1260  for (i = 1; i < argc; i++) Line 2028  for (i = 1; i < argc; i++)
2028            }            }
2029          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2030            {            {
2031            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2032            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2033                (int)strlen(arg) : (int)(argequals - arg);
2034            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2035              {              {
2036              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1274  for (i = 1; i < argc; i++) Line 2043  for (i = 1; i < argc; i++)
2043              }              }
2044            }            }
2045          }          }
2046        else                   /* Special case xxxx(p) */  
2047          /* Handle options with an alternate spelling of the name */
2048    
2049          else
2050          {          {
2051          char buff1[24];          char buff1[24];
2052          char buff2[24];          char buff2[24];
2053          int baselen = opbra - op->long_name;  
2054            int baselen = (int)(opbra - op->long_name);
2055            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2056            int arglen = (argequals == NULL || equals == NULL)?
2057              (int)strlen(arg) : (int)(argequals - arg);
2058    
2059          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2060          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2061            opbra + 1);  
2062          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2063               strncmp(arg, buff2, arglen) == 0)
2064              {
2065              if (equals != NULL && argequals != NULL)
2066                {
2067                option_data = argequals;
2068                if (*option_data == '=')
2069                  {
2070                  option_data++;
2071                  longopwasequals = TRUE;
2072                  }
2073                }
2074            break;            break;
2075              }
2076          }          }
2077        }        }
2078    
# Line 1294  for (i = 1; i < argc; i++) Line 2083  for (i = 1; i < argc; i++)
2083        }        }
2084      }      }
2085    
2086      /* Jeffrey Friedl's debugging harness uses these additional options which
2087      are not in the right form for putting in the option table because they use
2088      only one hyphen, yet are more than one character long. By putting them
2089      separately here, they will not get displayed as part of the help() output,
2090      but I don't think Jeffrey will care about that. */
2091    
2092    #ifdef JFRIEDL_DEBUG
2093      else if (strcmp(argv[i], "-pre") == 0) {
2094              jfriedl_prefix = argv[++i];
2095              continue;
2096      } else if (strcmp(argv[i], "-post") == 0) {
2097              jfriedl_postfix = argv[++i];
2098              continue;
2099      } else if (strcmp(argv[i], "-XT") == 0) {
2100              sscanf(argv[++i], "%d", &jfriedl_XT);
2101              continue;
2102      } else if (strcmp(argv[i], "-XR") == 0) {
2103              sscanf(argv[++i], "%d", &jfriedl_XR);
2104              continue;
2105      }
2106    #endif
2107    
2108    
2109    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
2110    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
2111    
# Line 1333  for (i = 1; i < argc; i++) Line 2145  for (i = 1; i < argc; i++)
2145    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2146    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2147    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r" and
2148    Jeffrey Friedl's special debugging option. */    Jeffrey Friedl's special -S debugging option. */
2149    
2150    if (*option_data == 0 &&    if (*option_data == 0 &&
2151        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1394  for (i = 1; i < argc; i++) Line 2206  for (i = 1; i < argc; i++)
2206          {          {
2207          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2208          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2209            equals - op->long_name;            (int)(equals - op->long_name);
2210          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2211            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2212          }          }
# Line 1416  if (both_context > 0) Line 2228  if (both_context > 0)
2228    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2229    }    }
2230    
2231    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2232    However, the latter two set the only_matching flag. */
2233    
2234    if ((only_matching && (file_offsets || line_offsets)) ||
2235        (file_offsets && line_offsets))
2236      {
2237      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2238        "and/or --line-offsets\n");
2239      exit(usage(2));
2240      }
2241    
2242    if (file_offsets || line_offsets) only_matching = TRUE;
2243    
2244  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2245  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2246    
# Line 1464  if (colour_option != NULL && strcmp(colo Line 2289  if (colour_option != NULL && strcmp(colo
2289      if (cs != NULL) colour_string = cs;      if (cs != NULL) colour_string = cs;
2290      }      }
2291    }    }
2292    
2293    /* Interpret the newline type; the default settings are Unix-like. */
2294    
2295    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2296      {
2297      pcre_options |= PCRE_NEWLINE_CR;
2298      endlinetype = EL_CR;
2299      }
2300    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2301      {
2302      pcre_options |= PCRE_NEWLINE_LF;
2303      endlinetype = EL_LF;
2304      }
2305    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2306      {
2307      pcre_options |= PCRE_NEWLINE_CRLF;
2308      endlinetype = EL_CRLF;
2309      }
2310    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2311      {
2312      pcre_options |= PCRE_NEWLINE_ANY;
2313      endlinetype = EL_ANY;
2314      }
2315    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2316      {
2317      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2318      endlinetype = EL_ANYCRLF;
2319      }
2320    else
2321      {
2322      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2323      return 2;
2324      }
2325    
2326  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
2327    
# Line 1490  if (DEE_option != NULL) Line 2348  if (DEE_option != NULL)
2348      }      }
2349    }    }
2350    
2351  /* Check the value for Jeff Friedl's debugging option. */  /* Check the values for Jeffrey Friedl's debugging options. */
2352    
2353  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2354  if (S_arg > 9)  if (S_arg > 9)
# Line 1498  if (S_arg > 9) Line 2356  if (S_arg > 9)
2356    fprintf(stderr, "pcregrep: bad value for -S option\n");    fprintf(stderr, "pcregrep: bad value for -S option\n");
2357    return 2;    return 2;
2358    }    }
2359    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2360      {
2361      if (jfriedl_XT == 0) jfriedl_XT = 1;
2362      if (jfriedl_XR == 0) jfriedl_XR = 1;
2363      }
2364  #endif  #endif
2365    
2366  /* Get memory to store the pattern and hints lists. */  /* Get memory to store the pattern and hints lists. */
# Line 1508  hints_list = (pcre_extra **)malloc(MAX_P Line 2371  hints_list = (pcre_extra **)malloc(MAX_P
2371  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
2372    {    {
2373    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2374    return 2;    goto EXIT2;
2375    }    }
2376    
2377  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1527  for (j = 0; j < cmd_pattern_count; j++) Line 2390  for (j = 0; j < cmd_pattern_count; j++)
2390    {    {
2391    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
2392         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2393      return 2;      goto EXIT2;
2394    }    }
2395    
2396  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1551  if (pattern_filename != NULL) Line 2414  if (pattern_filename != NULL)
2414        {        {
2415        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2416          strerror(errno));          strerror(errno));
2417        return 2;        goto EXIT2;
2418        }        }
2419      filename = pattern_filename;      filename = pattern_filename;
2420      }      }
# Line 1564  if (pattern_filename != NULL) Line 2427  if (pattern_filename != NULL)
2427      linenumber++;      linenumber++;
2428      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2429      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2430        return 2;        goto EXIT2;
2431      }      }
2432    
2433    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1580  for (j = 0; j < pattern_count; j++) Line 2443  for (j = 0; j < pattern_count; j++)
2443      char s[16];      char s[16];
2444      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2445      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2446      return 2;      goto EXIT2;
2447      }      }
2448      hint_count++;
2449    }    }
2450    
2451  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1594  if (exclude_pattern != NULL) Line 2458  if (exclude_pattern != NULL)
2458      {      {
2459      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2460        errptr, error);        errptr, error);
2461      return 2;      goto EXIT2;
2462      }      }
2463    }    }
2464    
# Line 1606  if (include_pattern != NULL) Line 2470  if (include_pattern != NULL)
2470      {      {
2471      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2472        errptr, error);        errptr, error);
2473      return 2;      goto EXIT2;
2474        }
2475      }
2476    
2477    if (exclude_dir_pattern != NULL)
2478      {
2479      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2480        pcretables);
2481      if (exclude_dir_compiled == NULL)
2482        {
2483        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2484          errptr, error);
2485        goto EXIT2;
2486        }
2487      }
2488    
2489    if (include_dir_pattern != NULL)
2490      {
2491      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2492        pcretables);
2493      if (include_dir_compiled == NULL)
2494        {
2495        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2496          errptr, error);
2497        goto EXIT2;
2498      }      }
2499    }    }
2500    
2501  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2502    
2503  if (i >= argc)  if (i >= argc)
2504    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2505      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2506      goto EXIT;
2507      }
2508    
2509  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2510  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1630  for (; i < argc; i++) Line 2521  for (; i < argc; i++)
2521      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2522    }    }
2523    
2524    EXIT:
2525    if (pattern_list != NULL)
2526      {
2527      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2528      free(pattern_list);
2529      }
2530    if (hints_list != NULL)
2531      {
2532      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2533      free(hints_list);
2534      }
2535  return rc;  return rc;
2536    
2537    EXIT2:
2538    rc = 2;
2539    goto EXIT;
2540  }  }
2541    
2542  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.87  
changed lines
  Added in v.530

  ViewVC Help
Powered by ViewVC 1.1.5