/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 89 by nigel, Sat Feb 24 21:41:27 2007 UTC revision 558 by ph10, Tue Oct 26 15:26:45 2010 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.2 09-Jan-2006"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 65  typedef int BOOL; Line 79  typedef int BOOL;
79  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
80  #endif  #endif
81    
   
82  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 100  enum { DEE_READ, DEE_SKIP };
100  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
101  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 100  static const char *jfriedl_prefix = ""; Line 129  static const char *jfriedl_prefix = "";
129  static const char *jfriedl_postfix = "";  static const char *jfriedl_postfix = "";
130  #endif  #endif
131    
132    static int  endlinetype;
133    
134  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
135  static char *colour_option = NULL;  static char *colour_option = NULL;
136  static char *dee_option = NULL;  static char *dee_option = NULL;
137  static char *DEE_option = NULL;  static char *DEE_option = NULL;
138    static char *newline = NULL;
139  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
140  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
141  static char *locale = NULL;  static char *locale = NULL;
# Line 111  static char *locale = NULL; Line 143  static char *locale = NULL;
143  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
144    
145  static int  pattern_count = 0;  static int  pattern_count = 0;
146  static pcre **pattern_list;  static pcre **pattern_list = NULL;
147  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
148    
149  static char *include_pattern = NULL;  static char *include_pattern = NULL;
150  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
155  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159  static int after_context = 0;  static int after_context = 0;
160  static int before_context = 0;  static int before_context = 0;
# Line 131  static int process_options = 0; Line 167  static int process_options = 0;
167    
168  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
169  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
170    static BOOL file_offsets = FALSE;
171  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
172  static BOOL invert = FALSE;  static BOOL invert = FALSE;
173    static BOOL line_buffered = FALSE;
174    static BOOL line_offsets = FALSE;
175  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
176  static BOOL number = FALSE;  static BOOL number = FALSE;
177    static BOOL omit_zero_count = FALSE;
178  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
179  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
180  static BOOL silent = FALSE;  static BOOL silent = FALSE;
181    static BOOL utf8 = FALSE;
182    
183  /* Structure for options and list of them */  /* Structure for options and list of them */
184    
# Line 155  typedef struct option_item { Line 196  typedef struct option_item {
196  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
197  used to identify them. */  used to identify them. */
198    
199  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
200  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
201  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
202  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
203  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
204  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
205  #define N_NULL      (-7)  #define N_LABEL        (-7)
206    #define N_LOCALE       (-8)
207    #define N_NULL         (-9)
208    #define N_LOFFSETS     (-10)
209    #define N_FOFFSETS     (-11)
210    #define N_LBUFFER      (-12)
211    
212  static option_item optionlist[] = {  static option_item optionlist[] = {
213    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 174  static option_item optionlist[] = { Line 220  static option_item optionlist[] = {
220    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
221    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
222    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
223    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
224    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
225    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
226      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
227    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
228    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
229    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
230    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
231    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
232    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
233      { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
234      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
235    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
236    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
237      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
238    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
239    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
240    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
241    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
242    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
243    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
244      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
245      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
246  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
247    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
248  #endif  #endif
# Line 214  static const char *prefix[] = { Line 266  static const char *prefix[] = {
266  static const char *suffix[] = {  static const char *suffix[] = {
267    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
268    
269    /* UTF-8 tables - used only when the newline setting is "any". */
270    
271    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
272    
273    const char utf8_table4[] = {
274      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
275      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
276      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
277      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
278    
279    
280    
281  /*************************************************  /*************************************************
# Line 226  although at present the only ones are fo Line 288  although at present the only ones are fo
288    
289  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
290    
291  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
292  #include <sys/types.h>  #include <sys/types.h>
293  #include <sys/stat.h>  #include <sys/stat.h>
294  #include <dirent.h>  #include <dirent.h>
# Line 258  for (;;) Line 320  for (;;)
320    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
321      return dent->d_name;      return dent->d_name;
322    }    }
323  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
324  }  }
325    
326  static void  static void
# Line 280  return (statbuf.st_mode & S_IFMT) == S_I Line 342  return (statbuf.st_mode & S_IFMT) == S_I
342  }  }
343    
344    
345  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
346    
347  static BOOL  static BOOL
348  is_stdout_tty(void)  is_stdout_tty(void)
# Line 288  is_stdout_tty(void) Line 350  is_stdout_tty(void)
350  return isatty(fileno(stdout));  return isatty(fileno(stdout));
351  }  }
352    
353    static BOOL
354    is_file_tty(FILE *f)
355    {
356    return isatty(fileno(f));
357    }
358    
359    
360  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
361    
362  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
363  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
364  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
365    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
366    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
367    undefined when it is indeed undefined. */
368    
369  #elif HAVE_WIN32API  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
370    
371  #ifndef STRICT  #ifndef STRICT
372  # define STRICT  # define STRICT
# Line 304  when it did not exist. */ Line 374  when it did not exist. */
374  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
375  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
376  #endif  #endif
377    
378    #include <windows.h>
379    
380  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
381  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
382  #endif  #endif
383    
 #include <windows.h>  
   
384  typedef struct directory_type  typedef struct directory_type
385  {  {
386  HANDLE handle;  HANDLE handle;
# Line 394  regular if they are not directories. */ Line 465  regular if they are not directories. */
465    
466  int isregfile(char *filename)  int isregfile(char *filename)
467  {  {
468  return !isdirectory(filename)  return !isdirectory(filename);
469  }  }
470    
471    
472  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
473    
474  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
475    
476  static BOOL  static BOOL
477  is_stdout_tty(void)  is_stdout_tty(void)
478  {  {
479  FALSE;  return FALSE;
480    }
481    
482    static BOOL
483    is_file_tty(FILE *f)
484    {
485    return FALSE;
486  }  }
487    
488    
# Line 418  FALSE; Line 495  FALSE;
495  typedef void directory_type;  typedef void directory_type;
496    
497  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
498  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
499  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
500  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
501    
502    
# Line 430  void closedirectory(directory_type *dir) Line 507  void closedirectory(directory_type *dir)
507  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
508    
509    
510  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
511    
512  static BOOL  static BOOL
513  is_stdout_tty(void)  is_stdout_tty(void)
# Line 438  is_stdout_tty(void) Line 515  is_stdout_tty(void)
515  return FALSE;  return FALSE;
516  }  }
517    
518    static BOOL
519    is_file_tty(FILE *f)
520    {
521    return FALSE;
522    }
523    
524  #endif  #endif
525    
526    
527    
528  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
529  /*************************************************  /*************************************************
530  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
531  *************************************************/  *************************************************/
# Line 466  return sys_errlist[n]; Line 548  return sys_errlist[n];
548    
549    
550  /*************************************************  /*************************************************
551    *            Read one line of input              *
552    *************************************************/
553    
554    /* Normally, input is read using fread() into a large buffer, so many lines may
555    be read at once. However, doing this for tty input means that no output appears
556    until a lot of input has been typed. Instead, tty input is handled line by
557    line. We cannot use fgets() for this, because it does not stop at a binary
558    zero, and therefore there is no way of telling how many characters it has read,
559    because there may be binary zeros embedded in the data.
560    
561    Arguments:
562      buffer     the buffer to read into
563      length     the maximum number of characters to read
564      f          the file
565    
566    Returns:     the number of characters read, zero at end of file
567    */
568    
569    static int
570    read_one_line(char *buffer, int length, FILE *f)
571    {
572    int c;
573    int yield = 0;
574    while ((c = fgetc(f)) != EOF)
575      {
576      buffer[yield++] = c;
577      if (c == '\n' || yield >= length) break;
578      }
579    return yield;
580    }
581    
582    
583    
584    /*************************************************
585    *             Find end of line                   *
586    *************************************************/
587    
588    /* The length of the endline sequence that is found is set via lenptr. This may
589    be zero at the very end of the file if there is no line-ending sequence there.
590    
591    Arguments:
592      p         current position in line
593      endptr    end of available data
594      lenptr    where to put the length of the eol sequence
595    
596    Returns:    pointer to the last byte of the line
597    */
598    
599    static char *
600    end_of_line(char *p, char *endptr, int *lenptr)
601    {
602    switch(endlinetype)
603      {
604      default:      /* Just in case */
605      case EL_LF:
606      while (p < endptr && *p != '\n') p++;
607      if (p < endptr)
608        {
609        *lenptr = 1;
610        return p + 1;
611        }
612      *lenptr = 0;
613      return endptr;
614    
615      case EL_CR:
616      while (p < endptr && *p != '\r') p++;
617      if (p < endptr)
618        {
619        *lenptr = 1;
620        return p + 1;
621        }
622      *lenptr = 0;
623      return endptr;
624    
625      case EL_CRLF:
626      for (;;)
627        {
628        while (p < endptr && *p != '\r') p++;
629        if (++p >= endptr)
630          {
631          *lenptr = 0;
632          return endptr;
633          }
634        if (*p == '\n')
635          {
636          *lenptr = 2;
637          return p + 1;
638          }
639        }
640      break;
641    
642      case EL_ANYCRLF:
643      while (p < endptr)
644        {
645        int extra = 0;
646        register int c = *((unsigned char *)p);
647    
648        if (utf8 && c >= 0xc0)
649          {
650          int gcii, gcss;
651          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
652          gcss = 6*extra;
653          c = (c & utf8_table3[extra]) << gcss;
654          for (gcii = 1; gcii <= extra; gcii++)
655            {
656            gcss -= 6;
657            c |= (p[gcii] & 0x3f) << gcss;
658            }
659          }
660    
661        p += 1 + extra;
662    
663        switch (c)
664          {
665          case 0x0a:    /* LF */
666          *lenptr = 1;
667          return p;
668    
669          case 0x0d:    /* CR */
670          if (p < endptr && *p == 0x0a)
671            {
672            *lenptr = 2;
673            p++;
674            }
675          else *lenptr = 1;
676          return p;
677    
678          default:
679          break;
680          }
681        }   /* End of loop for ANYCRLF case */
682    
683      *lenptr = 0;  /* Must have hit the end */
684      return endptr;
685    
686      case EL_ANY:
687      while (p < endptr)
688        {
689        int extra = 0;
690        register int c = *((unsigned char *)p);
691    
692        if (utf8 && c >= 0xc0)
693          {
694          int gcii, gcss;
695          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
696          gcss = 6*extra;
697          c = (c & utf8_table3[extra]) << gcss;
698          for (gcii = 1; gcii <= extra; gcii++)
699            {
700            gcss -= 6;
701            c |= (p[gcii] & 0x3f) << gcss;
702            }
703          }
704    
705        p += 1 + extra;
706    
707        switch (c)
708          {
709          case 0x0a:    /* LF */
710          case 0x0b:    /* VT */
711          case 0x0c:    /* FF */
712          *lenptr = 1;
713          return p;
714    
715          case 0x0d:    /* CR */
716          if (p < endptr && *p == 0x0a)
717            {
718            *lenptr = 2;
719            p++;
720            }
721          else *lenptr = 1;
722          return p;
723    
724          case 0x85:    /* NEL */
725          *lenptr = utf8? 2 : 1;
726          return p;
727    
728          case 0x2028:  /* LS */
729          case 0x2029:  /* PS */
730          *lenptr = 3;
731          return p;
732    
733          default:
734          break;
735          }
736        }   /* End of loop for ANY case */
737    
738      *lenptr = 0;  /* Must have hit the end */
739      return endptr;
740      }     /* End of overall switch */
741    }
742    
743    
744    
745    /*************************************************
746    *         Find start of previous line            *
747    *************************************************/
748    
749    /* This is called when looking back for before lines to print.
750    
751    Arguments:
752      p         start of the subsequent line
753      startptr  start of available data
754    
755    Returns:    pointer to the start of the previous line
756    */
757    
758    static char *
759    previous_line(char *p, char *startptr)
760    {
761    switch(endlinetype)
762      {
763      default:      /* Just in case */
764      case EL_LF:
765      p--;
766      while (p > startptr && p[-1] != '\n') p--;
767      return p;
768    
769      case EL_CR:
770      p--;
771      while (p > startptr && p[-1] != '\n') p--;
772      return p;
773    
774      case EL_CRLF:
775      for (;;)
776        {
777        p -= 2;
778        while (p > startptr && p[-1] != '\n') p--;
779        if (p <= startptr + 1 || p[-2] == '\r') return p;
780        }
781      return p;   /* But control should never get here */
782    
783      case EL_ANY:
784      case EL_ANYCRLF:
785      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
786      if (utf8) while ((*p & 0xc0) == 0x80) p--;
787    
788      while (p > startptr)
789        {
790        register int c;
791        char *pp = p - 1;
792    
793        if (utf8)
794          {
795          int extra = 0;
796          while ((*pp & 0xc0) == 0x80) pp--;
797          c = *((unsigned char *)pp);
798          if (c >= 0xc0)
799            {
800            int gcii, gcss;
801            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
802            gcss = 6*extra;
803            c = (c & utf8_table3[extra]) << gcss;
804            for (gcii = 1; gcii <= extra; gcii++)
805              {
806              gcss -= 6;
807              c |= (pp[gcii] & 0x3f) << gcss;
808              }
809            }
810          }
811        else c = *((unsigned char *)pp);
812    
813        if (endlinetype == EL_ANYCRLF) switch (c)
814          {
815          case 0x0a:    /* LF */
816          case 0x0d:    /* CR */
817          return p;
818    
819          default:
820          break;
821          }
822    
823        else switch (c)
824          {
825          case 0x0a:    /* LF */
826          case 0x0b:    /* VT */
827          case 0x0c:    /* FF */
828          case 0x0d:    /* CR */
829          case 0x85:    /* NEL */
830          case 0x2028:  /* LS */
831          case 0x2029:  /* PS */
832          return p;
833    
834          default:
835          break;
836          }
837    
838        p = pp;  /* Back one character */
839        }        /* End of loop for ANY case */
840    
841      return startptr;  /* Hit start of data */
842      }     /* End of overall switch */
843    }
844    
845    
846    
847    
848    
849    /*************************************************
850  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
851  *************************************************/  *************************************************/
852    
# Line 490  if (after_context > 0 && lastmatchnumber Line 871  if (after_context > 0 && lastmatchnumber
871    int count = 0;    int count = 0;
872    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
873      {      {
874        int ellength;
875      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
876      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
877      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
878      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
879      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
880      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
881      }      }
882    hyphenpending = TRUE;    hyphenpending = TRUE;
883    }    }
# Line 504  if (after_context > 0 && lastmatchnumber Line 886  if (after_context > 0 && lastmatchnumber
886    
887    
888  /*************************************************  /*************************************************
889    *   Apply patterns to subject till one matches   *
890    *************************************************/
891    
892    /* This function is called to run through all patterns, looking for a match. It
893    is used multiple times for the same subject when colouring is enabled, in order
894    to find all possible matches.
895    
896    Arguments:
897      matchptr    the start of the subject
898      length      the length of the subject to match
899      offsets     the offets vector to fill in
900      mrc         address of where to put the result of pcre_exec()
901    
902    Returns:      TRUE if there was a match
903                  FALSE if there was no match
904                  invert if there was a non-fatal error
905    */
906    
907    static BOOL
908    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
909    {
910    int i;
911    for (i = 0; i < pattern_count; i++)
912      {
913      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
914        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
915      if (*mrc >= 0) return TRUE;
916      if (*mrc == PCRE_ERROR_NOMATCH) continue;
917      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
918      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
919      fprintf(stderr, "this text:\n");
920      FWRITE(matchptr, 1, length, stderr);   /* In case binary zero included */
921      fprintf(stderr, "\n");
922      if (error_count == 0 &&
923          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
924        {
925        fprintf(stderr, "pcregrep: error %d means that a resource limit "
926          "was exceeded\n", *mrc);
927        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
928        }
929      if (error_count++ > 20)
930        {
931        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
932        exit(2);
933        }
934      return invert;    /* No more matching; don't show the line again */
935      }
936    
937    return FALSE;  /* No match, no errors */
938    }
939    
940    
941    
942    /*************************************************
943  *            Grep an individual file             *  *            Grep an individual file             *
944  *************************************************/  *************************************************/
945    
# Line 515  be in the middle third most of the time, Line 951  be in the middle third most of the time,
951  "before" context printing.  "before" context printing.
952    
953  Arguments:  Arguments:
954    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
955                   the gzFile pointer when reading is via libz
956                   the BZFILE pointer when reading is via libbz2
957      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
958    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
959                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
960                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
961    
962  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
963                 1 otherwise (no matches)                 1 otherwise (no matches)
964                   2 if there is a read error on a .bz2 file
965  */  */
966    
967  static int  static int
968  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
969  {  {
970  int rc = 1;  int rc = 1;
971  int linenumber = 1;  int linenumber = 1;
972  int lastmatchnumber = 0;  int lastmatchnumber = 0;
973  int count = 0;  int count = 0;
974  int offsets[99];  int filepos = 0;
975    int offsets[OFFSET_SIZE];
976  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
977  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
978  char *ptr = buffer;  char *ptr = buffer;
979  char *endptr;  char *endptr;
980  size_t bufflength;  size_t bufflength;
981  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
982    BOOL input_line_buffered = line_buffered;
983    FILE *in = NULL;                    /* Ensure initialized */
984    
985    #ifdef SUPPORT_LIBZ
986    gzFile ingz = NULL;
987    #endif
988    
989    #ifdef SUPPORT_LIBBZ2
990    BZFILE *inbz2 = NULL;
991    #endif
992    
993    
994  /* Do the first read into the start of the buffer and set up the pointer to  /* Do the first read into the start of the buffer and set up the pointer to end
995  end of what we have. */  of what we have. In the case of libz, a non-zipped .gz file will be read as a
996    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
997    fail. */
998    
999    #ifdef SUPPORT_LIBZ
1000    if (frtype == FR_LIBZ)
1001      {
1002      ingz = (gzFile)handle;
1003      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1004      }
1005    else
1006    #endif
1007    
1008    #ifdef SUPPORT_LIBBZ2
1009    if (frtype == FR_LIBBZ2)
1010      {
1011      inbz2 = (BZFILE *)handle;
1012      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1013      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1014      }                                    /* without the cast it is unsigned. */
1015    else
1016    #endif
1017    
1018      {
1019      in = (FILE *)handle;
1020      if (is_file_tty(in)) input_line_buffered = TRUE;
1021      bufflength = input_line_buffered?
1022        read_one_line(buffer, 3*MBUFTHIRD, in) :
1023        fread(buffer, 1, 3*MBUFTHIRD, in);
1024      }
1025    
 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  
1026  endptr = buffer + bufflength;  endptr = buffer + bufflength;
1027    
1028  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 552  way, the buffer is shifted left and re-f Line 1032  way, the buffer is shifted left and re-f
1032    
1033  while (ptr < endptr)  while (ptr < endptr)
1034    {    {
1035    int i;    int endlinelength;
1036    int mrc = 0;    int mrc = 0;
1037    BOOL match = FALSE;    BOOL match;
1038      char *matchptr = ptr;
1039    char *t = ptr;    char *t = ptr;
1040    size_t length, linelength;    size_t length, linelength;
1041    
1042    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1043    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1044    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1045    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1046    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1047    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1048      first line. */
1049    linelength = 0;  
1050    while (t < endptr && *t++ != '\n') linelength++;    t = end_of_line(t, endptr, &endlinelength);
1051    length = multiline? endptr - ptr : linelength;    linelength = t - ptr - endlinelength;
1052      length = multiline? (size_t)(endptr - ptr) : linelength;
1053    
1054    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1055    
# Line 579  while (ptr < endptr) Line 1060  while (ptr < endptr)
1060        #include <time.h>        #include <time.h>
1061        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1062        struct timezone dummy;        struct timezone dummy;
1063          int i;
1064    
1065        if (jfriedl_XT)        if (jfriedl_XT)
1066        {        {
# Line 604  while (ptr < endptr) Line 1086  while (ptr < endptr)
1086    
1087    
1088        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1089            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1090                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1091    
1092        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1093                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 618  while (ptr < endptr) Line 1101  while (ptr < endptr)
1101    }    }
1102  #endif  #endif
1103    
1104      /* We come back here after a match when the -o option (only_matching) is set,
1105      in order to find any further matches in the same line. */
1106    
1107    /* Run through all the patterns until one matches. Note that we don't include    ONLY_MATCHING_RESTART:
   the final newline in the subject string. */  
1108    
1109    for (i = 0; i < pattern_count; i++)    /* Run through all the patterns until one matches or there is an error other
1110      {    than NOMATCH. This code is in a subroutine so that it can be re-used for
1111      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    finding subsequent matches when colouring matched lines. */
1112        offsets, 99);  
1113      if (mrc >= 0) { match = TRUE; break; }    match = match_patterns(matchptr, length, offsets, &mrc);
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1114    
1115    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1116    
# Line 668  while (ptr < endptr) Line 1129  while (ptr < endptr)
1129      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1130      in the file. */      in the file. */
1131    
1132      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1133        {        {
1134        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1135        return 0;        return 0;
# Line 679  while (ptr < endptr) Line 1140  while (ptr < endptr)
1140      else if (quiet) return 0;      else if (quiet) return 0;
1141    
1142      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
1143      does not pring any context. */      the --file-offsets and --line-offsets options output offsets for the
1144        matching substring (they both force --only-matching). None of these options
1145        prints any context. Afterwards, adjust the start and length, and then jump
1146        back to look for further matches in the same line. If we are in invert
1147        mode, however, nothing is printed - this could be still useful because the
1148        return code is set. */
1149    
1150      else if (only_matching)      else if (only_matching)
1151        {        {
1152        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1153        if (number) fprintf(stdout, "%d:", linenumber);          {
1154        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1155        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1156            if (line_offsets)
1157              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1158                offsets[1] - offsets[0]);
1159            else if (file_offsets)
1160              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1161                offsets[1] - offsets[0]);
1162            else
1163              {
1164              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1165              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1166              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1167              }
1168            fprintf(stdout, "\n");
1169            matchptr += offsets[1];
1170            length -= offsets[1];
1171            match = FALSE;
1172            goto ONLY_MATCHING_RESTART;
1173            }
1174        }        }
1175    
1176      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 700  while (ptr < endptr) Line 1184  while (ptr < endptr)
1184    
1185        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1186          {          {
1187            int ellength;
1188          int linecount = 0;          int linecount = 0;
1189          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1190    
1191          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1192            {            {
1193            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1194            linecount++;            linecount++;
1195            }            }
1196    
# Line 719  while (ptr < endptr) Line 1203  while (ptr < endptr)
1203            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1204            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1205            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1206            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1207            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1208            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1209            }            }
1210          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1211          }          }
# Line 747  while (ptr < endptr) Line 1231  while (ptr < endptr)
1231                 linecount < before_context)                 linecount < before_context)
1232            {            {
1233            linecount++;            linecount++;
1234            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1235            }            }
1236    
1237          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 756  while (ptr < endptr) Line 1239  while (ptr < endptr)
1239    
1240          while (p < ptr)          while (p < ptr)
1241            {            {
1242              int ellength;
1243            char *pp = p;            char *pp = p;
1244            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1245            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1246            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1247            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            FWRITE(p, 1, pp - p, stdout);
1248            p = pp + 1;            p = pp;
1249            }            }
1250          }          }
1251    
# Line 776  while (ptr < endptr) Line 1260  while (ptr < endptr)
1260    
1261        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1262        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1263        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1264        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1265          the match will always be before the first newline sequence. */
1266    
1267        if (multiline)        if (multiline)
1268          {          {
1269          char *endmatch = ptr + offsets[1];          int ellength;
1270          t = ptr;          char *endmatch = ptr;
1271          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          if (!invert)
1272          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1273          linelength = endmatch - ptr;            endmatch += offsets[1];
1274              t = ptr;
1275              while (t < endmatch)
1276                {
1277                t = end_of_line(t, endptr, &ellength);
1278                if (t <= endmatch) linenumber++; else break;
1279                }
1280              }
1281            endmatch = end_of_line(endmatch, endptr, &ellength);
1282            linelength = endmatch - ptr - ellength;
1283          }          }
1284    
1285        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 800  while (ptr < endptr) Line 1294  while (ptr < endptr)
1294          {          {
1295          int first = S_arg * 2;          int first = S_arg * 2;
1296          int last  = first + 1;          int last  = first + 1;
1297          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1298          fprintf(stdout, "X");          fprintf(stdout, "X");
1299          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1300          }          }
1301        else        else
1302  #endif  #endif
1303    
1304        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1305          matches. */
1306    
1307        if (do_colour)        if (do_colour)
1308          {          {
1309          fwrite(ptr, 1, offsets[0], stdout);          int last_offset = 0;
1310            FWRITE(ptr, 1, offsets[0], stdout);
1311          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1312          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1313          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1314          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1315              {
1316              last_offset += offsets[1];
1317              matchptr += offsets[1];
1318              length -= offsets[1];
1319              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1320              FWRITE(matchptr, 1, offsets[0], stdout);
1321              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1322              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1323              fprintf(stdout, "%c[00m", 0x1b);
1324              }
1325            FWRITE(ptr + last_offset, 1,
1326              (linelength + endlinelength) - last_offset, stdout);
1327          }          }
       else fwrite(ptr, 1, linelength, stdout);  
1328    
1329        fprintf(stdout, "\n");        /* Not colouring; no need to search for further matches */
1330    
1331          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1332        }        }
1333    
1334      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1335        given, flush the output. */
1336    
1337        if (line_buffered) fflush(stdout);
1338      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1339    
1340      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1341      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1342    
1343      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1344      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1345      }      }
1346    
1347    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1348      anything to be printed), we have to move on to the end of the match before
1349      proceeding. */
1350    
1351      if (multiline && invert && match)
1352        {
1353        int ellength;
1354        char *endmatch = ptr + offsets[1];
1355        t = ptr;
1356        while (t < endmatch)
1357          {
1358          t = end_of_line(t, endptr, &ellength);
1359          if (t <= endmatch) linenumber++; else break;
1360          }
1361        endmatch = end_of_line(endmatch, endptr, &ellength);
1362        linelength = endmatch - ptr - ellength;
1363        }
1364    
1365      /* Advance to after the newline and increment the line number. The file
1366      offset to the current line is maintained in filepos. */
1367    
1368    ptr += linelength + 1;    ptr += linelength + endlinelength;
1369      filepos += (int)(linelength + endlinelength);
1370    linenumber++;    linenumber++;
1371    
1372      /* If input is line buffered, and the buffer is not yet full, read another
1373      line and add it into the buffer. */
1374    
1375      if (input_line_buffered && bufflength < sizeof(buffer))
1376        {
1377        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1378        bufflength += add;
1379        endptr += add;
1380        }
1381    
1382    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1383    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1384    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
# Line 857  while (ptr < endptr) Line 1398  while (ptr < endptr)
1398    
1399      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1400      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1401      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1402    #ifdef SUPPORT_LIBZ
1403        if (frtype == FR_LIBZ)
1404          bufflength = 2*MBUFTHIRD +
1405            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1406        else
1407    #endif
1408    
1409    #ifdef SUPPORT_LIBBZ2
1410        if (frtype == FR_LIBBZ2)
1411          bufflength = 2*MBUFTHIRD +
1412            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1413        else
1414    #endif
1415    
1416        bufflength = 2*MBUFTHIRD +
1417          (input_line_buffered?
1418           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1419           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1420      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1421    
1422      /* Adjust any last match point */      /* Adjust any last match point */
# Line 888  if (filenames == FN_NOMATCH_ONLY) Line 1447  if (filenames == FN_NOMATCH_ONLY)
1447    
1448  if (count_only)  if (count_only)
1449    {    {
1450    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1451    fprintf(stdout, "%d\n", count);      {
1452        if (printname != NULL && filenames != FN_NONE)
1453          fprintf(stdout, "%s:", printname);
1454        fprintf(stdout, "%d\n", count);
1455        }
1456    }    }
1457    
1458  return rc;  return rc;
# Line 921  grep_or_recurse(char *pathname, BOOL dir Line 1484  grep_or_recurse(char *pathname, BOOL dir
1484  {  {
1485  int rc = 1;  int rc = 1;
1486  int sep;  int sep;
1487  FILE *in;  int frtype;
1488    int pathlen;
1489    void *handle;
1490    FILE *in = NULL;           /* Ensure initialized */
1491    
1492    #ifdef SUPPORT_LIBZ
1493    gzFile ingz = NULL;
1494    #endif
1495    
1496    #ifdef SUPPORT_LIBBZ2
1497    BZFILE *inbz2 = NULL;
1498    #endif
1499    
1500  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1501    
1502  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1503    {    {
1504    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1505      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1506        stdin_name : NULL);        stdin_name : NULL);
1507    }    }
1508    
   
1509  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1510  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1511  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1512    system-specific. */
1513    
1514  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1515    {    {
# Line 956  if ((sep = isdirectory(pathname)) != 0) Line 1530  if ((sep = isdirectory(pathname)) != 0)
1530    
1531      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1532        {        {
1533        int frc, blen;        int frc, nflen;
1534        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1535        blen = strlen(buffer);        nflen = (int)(strlen(nextfile));
1536    
1537          if (isdirectory(buffer))
1538            {
1539            if (exclude_dir_compiled != NULL &&
1540                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1541              continue;
1542    
1543        if (exclude_compiled != NULL &&          if (include_dir_compiled != NULL &&
1544            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)              pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1545          continue;            continue;
1546            }
1547        if (include_compiled != NULL &&        else
1548            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)          {
1549          continue;          if (exclude_compiled != NULL &&
1550                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1551              continue;
1552    
1553            if (include_compiled != NULL &&
1554                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1555              continue;
1556            }
1557    
1558        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1559        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 989  skipping was not requested. The scan pro Line 1576  skipping was not requested. The scan pro
1576  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1577  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1578    
1579  in = fopen(pathname, "r");  pathlen = (int)(strlen(pathname));
1580  if (in == NULL)  
1581    /* Open using zlib if it is supported and the file name ends with .gz. */
1582    
1583    #ifdef SUPPORT_LIBZ
1584    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1585      {
1586      ingz = gzopen(pathname, "rb");
1587      if (ingz == NULL)
1588        {
1589        if (!silent)
1590          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1591            strerror(errno));
1592        return 2;
1593        }
1594      handle = (void *)ingz;
1595      frtype = FR_LIBZ;
1596      }
1597    else
1598    #endif
1599    
1600    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1601    
1602    #ifdef SUPPORT_LIBBZ2
1603    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1604      {
1605      inbz2 = BZ2_bzopen(pathname, "rb");
1606      handle = (void *)inbz2;
1607      frtype = FR_LIBBZ2;
1608      }
1609    else
1610    #endif
1611    
1612    /* Otherwise use plain fopen(). The label is so that we can come back here if
1613    an attempt to read a .bz2 file indicates that it really is a plain file. */
1614    
1615    #ifdef SUPPORT_LIBBZ2
1616    PLAIN_FILE:
1617    #endif
1618      {
1619      in = fopen(pathname, "rb");
1620      handle = (void *)in;
1621      frtype = FR_PLAIN;
1622      }
1623    
1624    /* All the opening methods return errno when they fail. */
1625    
1626    if (handle == NULL)
1627    {    {
1628    if (!silent)    if (!silent)
1629      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 998  if (in == NULL) Line 1631  if (in == NULL)
1631    return 2;    return 2;
1632    }    }
1633    
1634  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1635    
1636    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1637    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1638    
1639    /* Close in an appropriate manner. */
1640    
1641    #ifdef SUPPORT_LIBZ
1642    if (frtype == FR_LIBZ)
1643      gzclose(ingz);
1644    else
1645    #endif
1646    
1647    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1648    read failed. If the error indicates that the file isn't in fact bzipped, try
1649    again as a normal file. */
1650    
1651    #ifdef SUPPORT_LIBBZ2
1652    if (frtype == FR_LIBBZ2)
1653      {
1654      if (rc == 2)
1655        {
1656        int errnum;
1657        const char *err = BZ2_bzerror(inbz2, &errnum);
1658        if (errnum == BZ_DATA_ERROR_MAGIC)
1659          {
1660          BZ2_bzclose(inbz2);
1661          goto PLAIN_FILE;
1662          }
1663        else if (!silent)
1664          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1665            pathname, err);
1666        }
1667      BZ2_bzclose(inbz2);
1668      }
1669    else
1670    #endif
1671    
1672    /* Normal file close */
1673    
1674  fclose(in);  fclose(in);
1675    
1676    /* Pass back the yield from pcregrep(). */
1677    
1678  return rc;  return rc;
1679  }  }
1680    
# Line 1022  for (op = optionlist; op->one_char != 0; Line 1695  for (op = optionlist; op->one_char != 0;
1695    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1696    }    }
1697  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1698  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1699      "options.\n");
1700  return rc;  return rc;
1701  }  }
1702    
# Line 1041  option_item *op; Line 1715  option_item *op;
1715  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1716  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1717  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1718  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1719    
1720    #ifdef SUPPORT_LIBZ
1721    printf("Files whose names end in .gz are read using zlib.\n");
1722    #endif
1723    
1724    #ifdef SUPPORT_LIBBZ2
1725    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1726    #endif
1727    
1728    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1729    printf("Other files and the standard input are read as plain files.\n\n");
1730    #else
1731    printf("All files are read as plain files, without any interpretation.\n\n");
1732    #endif
1733    
1734    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1735  printf("Options:\n");  printf("Options:\n");
1736    
1737  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 1051  for (op = optionlist; op->one_char != 0; Line 1739  for (op = optionlist; op->one_char != 0;
1739    int n;    int n;
1740    char s[4];    char s[4];
1741    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1742    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1743    if (n < 1) n = 1;    if (n < 1) n = 1;
1744    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1745    }    }
# Line 1077  handle_option(int letter, int options) Line 1764  handle_option(int letter, int options)
1764  {  {
1765  switch(letter)  switch(letter)
1766    {    {
1767      case N_FOFFSETS: file_offsets = TRUE; break;
1768    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1769      case N_LOFFSETS: line_offsets = number = TRUE; break;
1770      case N_LBUFFER: line_buffered = TRUE; break;
1771    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1772    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1773    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1774    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1775    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1776    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1777    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1778    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1779    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
# Line 1091  switch(letter) Line 1781  switch(letter)
1781    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1782    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1783    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1784    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1785    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1786    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1787    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1788    
1789    case 'V':    case 'V':
1790    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1791    exit(0);    exit(0);
1792    break;    break;
1793    
# Line 1174  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1863  sprintf(buffer, "%s%.*s%s", prefix[proce
1863    suffix[process_options]);    suffix[process_options]);
1864  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1865    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1866  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1867      {
1868      pattern_count++;
1869      return TRUE;
1870      }
1871    
1872  /* Handle compile errors */  /* Handle compile errors */
1873    
# Line 1206  return FALSE; Line 1899  return FALSE;
1899  *************************************************/  *************************************************/
1900    
1901  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1902  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1903    
1904  Arguments:  Arguments:
1905    pattern        the pattern string    pattern        the pattern string
# Line 1224  compile_pattern(char *pattern, int optio Line 1917  compile_pattern(char *pattern, int optio
1917  {  {
1918  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1919    {    {
1920      char *eop = pattern + strlen(pattern);
1921    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1922    for(;;)    for(;;)
1923      {      {
1924      char *p = strchr(pattern, '\n');      int ellength;
1925      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1926        if (ellength == 0)
1927        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1928      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1929      pattern = p + 1;      pattern = p;
1930      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1931        return FALSE;        return FALSE;
1932      }      }
# Line 1254  int i, j; Line 1949  int i, j;
1949  int rc = 1;  int rc = 1;
1950  int pcre_options = 0;  int pcre_options = 0;
1951  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1952    int hint_count = 0;
1953  int errptr;  int errptr;
1954  BOOL only_one_at_top;  BOOL only_one_at_top;
1955  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
1956  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1957  const char *error;  const char *error;
1958    
1959    /* Set the default line ending value from the default in the PCRE library;
1960    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1961    Note that the return values from pcre_config(), though derived from the ASCII
1962    codes, are the same in EBCDIC environments, so we must use the actual values
1963    rather than escapes such as as '\r'. */
1964    
1965    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1966    switch(i)
1967      {
1968      default:               newline = (char *)"lf"; break;
1969      case 13:               newline = (char *)"cr"; break;
1970      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1971      case -1:               newline = (char *)"any"; break;
1972      case -2:               newline = (char *)"anycrlf"; break;
1973      }
1974    
1975  /* Process the options */  /* Process the options */
1976    
1977  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1299  for (i = 1; i < argc; i++) Line 2011  for (i = 1; i < argc; i++)
2011      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2012      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2013      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2014      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2015      these categories, fortunately. */      both these categories. */
2016    
2017      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2018        {        {
2019        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2020        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2021        if (opbra == NULL)     /* Not a (p) case */  
2022          /* Handle options with only one spelling of the name */
2023    
2024          if (opbra == NULL)     /* Does not contain '(' */
2025          {          {
2026          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2027            {            {
# Line 1314  for (i = 1; i < argc; i++) Line 2029  for (i = 1; i < argc; i++)
2029            }            }
2030          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2031            {            {
2032            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2033            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2034                (int)strlen(arg) : (int)(argequals - arg);
2035            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2036              {              {
2037              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1328  for (i = 1; i < argc; i++) Line 2044  for (i = 1; i < argc; i++)
2044              }              }
2045            }            }
2046          }          }
2047        else                   /* Special case xxxx(p) */  
2048          /* Handle options with an alternate spelling of the name */
2049    
2050          else
2051          {          {
2052          char buff1[24];          char buff1[24];
2053          char buff2[24];          char buff2[24];
2054          int baselen = opbra - op->long_name;  
2055            int baselen = (int)(opbra - op->long_name);
2056            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2057            int arglen = (argequals == NULL || equals == NULL)?
2058              (int)strlen(arg) : (int)(argequals - arg);
2059    
2060          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2061          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2062            opbra + 1);  
2063          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2064               strncmp(arg, buff2, arglen) == 0)
2065              {
2066              if (equals != NULL && argequals != NULL)
2067                {
2068                option_data = argequals;
2069                if (*option_data == '=')
2070                  {
2071                  option_data++;
2072                  longopwasequals = TRUE;
2073                  }
2074                }
2075            break;            break;
2076              }
2077          }          }
2078        }        }
2079    
# Line 1348  for (i = 1; i < argc; i++) Line 2084  for (i = 1; i < argc; i++)
2084        }        }
2085      }      }
2086    
   
2087    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2088    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2089    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1462  for (i = 1; i < argc; i++) Line 2197  for (i = 1; i < argc; i++)
2197      {      {
2198      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2199      }      }
2200    
2201      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2202      only for unpicking arguments, so just keep it simple. */
2203    
2204    else    else
2205      {      {
2206      char *endptr;      int n = 0;
2207      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2208        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2209        while (isdigit((unsigned char)(*endptr)))
2210          n = n * 10 + (int)(*endptr++ - '0');
2211      if (*endptr != 0)      if (*endptr != 0)
2212        {        {
2213        if (longop)        if (longop)
2214          {          {
2215          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2216          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2217            equals - op->long_name;            (int)(equals - op->long_name);
2218          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2219            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2220          }          }
# Line 1494  if (both_context > 0) Line 2236  if (both_context > 0)
2236    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2237    }    }
2238    
2239    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2240    However, the latter two set the only_matching flag. */
2241    
2242    if ((only_matching && (file_offsets || line_offsets)) ||
2243        (file_offsets && line_offsets))
2244      {
2245      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2246        "and/or --line-offsets\n");
2247      exit(usage(2));
2248      }
2249    
2250    if (file_offsets || line_offsets) only_matching = TRUE;
2251    
2252  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2253  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2254    
# Line 1543  if (colour_option != NULL && strcmp(colo Line 2298  if (colour_option != NULL && strcmp(colo
2298      }      }
2299    }    }
2300    
2301    /* Interpret the newline type; the default settings are Unix-like. */
2302    
2303    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2304      {
2305      pcre_options |= PCRE_NEWLINE_CR;
2306      endlinetype = EL_CR;
2307      }
2308    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2309      {
2310      pcre_options |= PCRE_NEWLINE_LF;
2311      endlinetype = EL_LF;
2312      }
2313    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2314      {
2315      pcre_options |= PCRE_NEWLINE_CRLF;
2316      endlinetype = EL_CRLF;
2317      }
2318    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2319      {
2320      pcre_options |= PCRE_NEWLINE_ANY;
2321      endlinetype = EL_ANY;
2322      }
2323    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2324      {
2325      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2326      endlinetype = EL_ANYCRLF;
2327      }
2328    else
2329      {
2330      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2331      return 2;
2332      }
2333    
2334  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
2335    
2336  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1591  hints_list = (pcre_extra **)malloc(MAX_P Line 2379  hints_list = (pcre_extra **)malloc(MAX_P
2379  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
2380    {    {
2381    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2382    return 2;    goto EXIT2;
2383    }    }
2384    
2385  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1610  for (j = 0; j < cmd_pattern_count; j++) Line 2398  for (j = 0; j < cmd_pattern_count; j++)
2398    {    {
2399    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
2400         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2401      return 2;      goto EXIT2;
2402    }    }
2403    
2404  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1634  if (pattern_filename != NULL) Line 2422  if (pattern_filename != NULL)
2422        {        {
2423        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2424          strerror(errno));          strerror(errno));
2425        return 2;        goto EXIT2;
2426        }        }
2427      filename = pattern_filename;      filename = pattern_filename;
2428      }      }
# Line 1647  if (pattern_filename != NULL) Line 2435  if (pattern_filename != NULL)
2435      linenumber++;      linenumber++;
2436      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2437      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2438        return 2;        goto EXIT2;
2439      }      }
2440    
2441    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1663  for (j = 0; j < pattern_count; j++) Line 2451  for (j = 0; j < pattern_count; j++)
2451      char s[16];      char s[16];
2452      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2453      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2454      return 2;      goto EXIT2;
2455      }      }
2456      hint_count++;
2457    }    }
2458    
2459  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1677  if (exclude_pattern != NULL) Line 2466  if (exclude_pattern != NULL)
2466      {      {
2467      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2468        errptr, error);        errptr, error);
2469      return 2;      goto EXIT2;
2470      }      }
2471    }    }
2472    
# Line 1689  if (include_pattern != NULL) Line 2478  if (include_pattern != NULL)
2478      {      {
2479      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2480        errptr, error);        errptr, error);
2481      return 2;      goto EXIT2;
2482        }
2483      }
2484    
2485    if (exclude_dir_pattern != NULL)
2486      {
2487      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2488        pcretables);
2489      if (exclude_dir_compiled == NULL)
2490        {
2491        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2492          errptr, error);
2493        goto EXIT2;
2494        }
2495      }
2496    
2497    if (include_dir_pattern != NULL)
2498      {
2499      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2500        pcretables);
2501      if (include_dir_compiled == NULL)
2502        {
2503        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2504          errptr, error);
2505        goto EXIT2;
2506      }      }
2507    }    }
2508    
2509  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2510    
2511  if (i >= argc)  if (i >= argc)
2512    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2513      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2514      goto EXIT;
2515      }
2516    
2517  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2518  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1713  for (; i < argc; i++) Line 2529  for (; i < argc; i++)
2529      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2530    }    }
2531    
2532    EXIT:
2533    if (pattern_list != NULL)
2534      {
2535      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2536      free(pattern_list);
2537      }
2538    if (hints_list != NULL)
2539      {
2540      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2541      free(hints_list);
2542      }
2543  return rc;  return rc;
2544    
2545    EXIT2:
2546    rc = 2;
2547    goto EXIT;
2548  }  }
2549    
2550  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.89  
changed lines
  Added in v.558

  ViewVC Help
Powered by ViewVC 1.1.5