/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 565 by ph10, Sun Oct 31 18:18:48 2010 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.4 29-Nov-2006"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 69  typedef int BOOL; Line 83  typedef int BOOL;
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
# Line 84  enum { DEE_READ, DEE_SKIP }; Line 102  enum { DEE_READ, DEE_SKIP };
102    
103  /* Line ending types */  /* Line ending types */
104    
105  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
# Line 117  static char *locale = NULL; Line 143  static char *locale = NULL;
143  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
144    
145  static int  pattern_count = 0;  static int  pattern_count = 0;
146  static pcre **pattern_list;  static pcre **pattern_list = NULL;
147  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
148    
149  static char *include_pattern = NULL;  static char *include_pattern = NULL;
150  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
155  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159  static int after_context = 0;  static int after_context = 0;
160  static int before_context = 0;  static int before_context = 0;
# Line 133  static int dee_action = dee_READ; Line 163  static int dee_action = dee_READ;
163  static int DEE_action = DEE_READ;  static int DEE_action = DEE_READ;
164  static int error_count = 0;  static int error_count = 0;
165  static int filenames = FN_DEFAULT;  static int filenames = FN_DEFAULT;
166    static int only_matching = -1;
167  static int process_options = 0;  static int process_options = 0;
168    
169    static unsigned long int match_limit = 0;
170    static unsigned long int match_limit_recursion = 0;
171    
172  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
173  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
174    static BOOL file_offsets = FALSE;
175  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
176  static BOOL invert = FALSE;  static BOOL invert = FALSE;
177    static BOOL line_buffered = FALSE;
178    static BOOL line_offsets = FALSE;
179  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
180  static BOOL number = FALSE;  static BOOL number = FALSE;
181  static BOOL only_matching = FALSE;  static BOOL omit_zero_count = FALSE;
182    static BOOL resource_error = FALSE;
183  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
184  static BOOL silent = FALSE;  static BOOL silent = FALSE;
185  static BOOL utf8 = FALSE;  static BOOL utf8 = FALSE;
# Line 162  typedef struct option_item { Line 200  typedef struct option_item {
200  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
201  used to identify them. */  used to identify them. */
202    
203  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
204  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
205  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
206  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
207  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
208  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
209  #define N_NULL      (-7)  #define N_LABEL        (-7)
210    #define N_LOCALE       (-8)
211    #define N_NULL         (-9)
212    #define N_LOFFSETS     (-10)
213    #define N_FOFFSETS     (-11)
214    #define N_LBUFFER      (-12)
215    #define N_M_LIMIT      (-13)
216    #define N_M_LIMIT_REC  (-14)
217    
218  static option_item optionlist[] = {  static option_item optionlist[] = {
219    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 176  static option_item optionlist[] = { Line 221  static option_item optionlist[] = {
221    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
222    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
223    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
224      { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
225    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
226    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
   { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },  
227    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
228    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
229    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
230    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
231    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
232      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
233    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
234    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
235    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
236    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
237    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
238    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
239      { OP_NODATA,    N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
240      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
241    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
242      { OP_NUMBER,    N_M_LIMIT,&match_limit,      "match-limit=number", "set PCRE match limit option" },
243      { OP_NUMBER,    N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
245    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
247    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_OP_NUMBER, 'o',      &only_matching,    "only-matching=n", "show only the part of the line that matched" },
248    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
249    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
250    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
251    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
252      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
253      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
254  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
255    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
256  #endif  #endif
# Line 222  static const char *prefix[] = { Line 274  static const char *prefix[] = {
274  static const char *suffix[] = {  static const char *suffix[] = {
275    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
276    
277  /* UTF-8 tables - used only when the newline setting is "all". */  /* UTF-8 tables - used only when the newline setting is "any". */
278    
279  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
280    
# Line 244  although at present the only ones are fo Line 296  although at present the only ones are fo
296    
297  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
298    
299  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
300  #include <sys/types.h>  #include <sys/types.h>
301  #include <sys/stat.h>  #include <sys/stat.h>
302  #include <dirent.h>  #include <dirent.h>
# Line 276  for (;;) Line 328  for (;;)
328    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
329      return dent->d_name;      return dent->d_name;
330    }    }
331  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
332  }  }
333    
334  static void  static void
# Line 298  return (statbuf.st_mode & S_IFMT) == S_I Line 350  return (statbuf.st_mode & S_IFMT) == S_I
350  }  }
351    
352    
353  /************* Test stdout for being a terminal in Unix **********/  /************* Test for a terminal in Unix **********/
354    
355  static BOOL  static BOOL
356  is_stdout_tty(void)  is_stdout_tty(void)
# Line 306  is_stdout_tty(void) Line 358  is_stdout_tty(void)
358  return isatty(fileno(stdout));  return isatty(fileno(stdout));
359  }  }
360    
361    static BOOL
362    is_file_tty(FILE *f)
363    {
364    return isatty(fileno(f));
365    }
366    
367    
368  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
369    
370  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
371  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
372  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
373    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
374    The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
375    undefined when it is indeed undefined. */
376    
377  #elif HAVE_WIN32API  #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
378    
379  #ifndef STRICT  #ifndef STRICT
380  # define STRICT  # define STRICT
# Line 322  when it did not exist. */ Line 382  when it did not exist. */
382  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
383  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
384  #endif  #endif
385    
386    #include <windows.h>
387    
388  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
389  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
390  #endif  #endif
391    
 #include <windows.h>  
   
392  typedef struct directory_type  typedef struct directory_type
393  {  {
394  HANDLE handle;  HANDLE handle;
# Line 357  dir = (directory_type *) malloc(sizeof(* Line 418  dir = (directory_type *) malloc(sizeof(*
418  if ((pattern == NULL) || (dir == NULL))  if ((pattern == NULL) || (dir == NULL))
419    {    {
420    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
421    exit(2);    pcregrep_exit(2);
422    }    }
423  memcpy(pattern, filename, len);  memcpy(pattern, filename, len);
424  memcpy(&(pattern[len]), "\\*", 3);  memcpy(&(pattern[len]), "\\*", 3);
# Line 412  regular if they are not directories. */ Line 473  regular if they are not directories. */
473    
474  int isregfile(char *filename)  int isregfile(char *filename)
475  {  {
476  return !isdirectory(filename)  return !isdirectory(filename);
477  }  }
478    
479    
480  /************* Test stdout for being a terminal in Win32 **********/  /************* Test for a terminal in Win32 **********/
481    
482  /* I don't know how to do this; assume never */  /* I don't know how to do this; assume never */
483    
484  static BOOL  static BOOL
485  is_stdout_tty(void)  is_stdout_tty(void)
486  {  {
487  FALSE;  return FALSE;
488    }
489    
490    static BOOL
491    is_file_tty(FILE *f)
492    {
493    return FALSE;
494  }  }
495    
496    
# Line 436  FALSE; Line 503  FALSE;
503  typedef void directory_type;  typedef void directory_type;
504    
505  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
506  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
507  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
508  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
509    
510    
# Line 448  void closedirectory(directory_type *dir) Line 515  void closedirectory(directory_type *dir)
515  int isregfile(char *filename) { return 1; }  int isregfile(char *filename) { return 1; }
516    
517    
518  /************* Test stdout for being a terminal when we can't do it **********/  /************* Test for a terminal when we can't do it **********/
519    
520  static BOOL  static BOOL
521  is_stdout_tty(void)  is_stdout_tty(void)
# Line 456  is_stdout_tty(void) Line 523  is_stdout_tty(void)
523  return FALSE;  return FALSE;
524  }  }
525    
526    static BOOL
527    is_file_tty(FILE *f)
528    {
529    return FALSE;
530    }
531    
532  #endif  #endif
533    
534    
535    
536  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
537  /*************************************************  /*************************************************
538  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
539  *************************************************/  *************************************************/
# Line 484  return sys_errlist[n]; Line 556  return sys_errlist[n];
556    
557    
558  /*************************************************  /*************************************************
559    *         Exit from the program                  *
560    *************************************************/
561    
562    /* If there has been a resource error, give a suitable message.
563    
564    Argument:  the return code
565    Returns:   does not return
566    */
567    
568    static void
569    pcregrep_exit(int rc)
570    {
571    if (resource_error)
572      {
573      fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
574        "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
575      fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
576      }
577    
578    exit(rc);
579    }
580    
581    
582    
583    /*************************************************
584    *            Read one line of input              *
585    *************************************************/
586    
587    /* Normally, input is read using fread() into a large buffer, so many lines may
588    be read at once. However, doing this for tty input means that no output appears
589    until a lot of input has been typed. Instead, tty input is handled line by
590    line. We cannot use fgets() for this, because it does not stop at a binary
591    zero, and therefore there is no way of telling how many characters it has read,
592    because there may be binary zeros embedded in the data.
593    
594    Arguments:
595      buffer     the buffer to read into
596      length     the maximum number of characters to read
597      f          the file
598    
599    Returns:     the number of characters read, zero at end of file
600    */
601    
602    static int
603    read_one_line(char *buffer, int length, FILE *f)
604    {
605    int c;
606    int yield = 0;
607    while ((c = fgetc(f)) != EOF)
608      {
609      buffer[yield++] = c;
610      if (c == '\n' || yield >= length) break;
611      }
612    return yield;
613    }
614    
615    
616    
617    /*************************************************
618  *             Find end of line                   *  *             Find end of line                   *
619  *************************************************/  *************************************************/
620    
# Line 541  switch(endlinetype) Line 672  switch(endlinetype)
672      }      }
673    break;    break;
674    
675      case EL_ANYCRLF:
676      while (p < endptr)
677        {
678        int extra = 0;
679        register int c = *((unsigned char *)p);
680    
681        if (utf8 && c >= 0xc0)
682          {
683          int gcii, gcss;
684          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
685          gcss = 6*extra;
686          c = (c & utf8_table3[extra]) << gcss;
687          for (gcii = 1; gcii <= extra; gcii++)
688            {
689            gcss -= 6;
690            c |= (p[gcii] & 0x3f) << gcss;
691            }
692          }
693    
694        p += 1 + extra;
695    
696        switch (c)
697          {
698          case 0x0a:    /* LF */
699          *lenptr = 1;
700          return p;
701    
702          case 0x0d:    /* CR */
703          if (p < endptr && *p == 0x0a)
704            {
705            *lenptr = 2;
706            p++;
707            }
708          else *lenptr = 1;
709          return p;
710    
711          default:
712          break;
713          }
714        }   /* End of loop for ANYCRLF case */
715    
716      *lenptr = 0;  /* Must have hit the end */
717      return endptr;
718    
719    case EL_ANY:    case EL_ANY:
720    while (p < endptr)    while (p < endptr)
721      {      {
# Line 639  switch(endlinetype) Line 814  switch(endlinetype)
814    return p;   /* But control should never get here */    return p;   /* But control should never get here */
815    
816    case EL_ANY:    case EL_ANY:
817      case EL_ANYCRLF:
818    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
819    if (utf8) while ((*p & 0xc0) == 0x80) p--;    if (utf8) while ((*p & 0xc0) == 0x80) p--;
820    
# Line 667  switch(endlinetype) Line 843  switch(endlinetype)
843        }        }
844      else c = *((unsigned char *)pp);      else c = *((unsigned char *)pp);
845    
846      switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
847          {
848          case 0x0a:    /* LF */
849          case 0x0d:    /* CR */
850          return p;
851    
852          default:
853          break;
854          }
855    
856        else switch (c)
857        {        {
858        case 0x0a:    /* LF */        case 0x0a:    /* LF */
859        case 0x0b:    /* VT */        case 0x0b:    /* VT */
# Line 723  if (after_context > 0 && lastmatchnumber Line 909  if (after_context > 0 && lastmatchnumber
909      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
910      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
911      pp = end_of_line(pp, endptr, &ellength);      pp = end_of_line(pp, endptr, &ellength);
912      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
913      lastmatchrestart = pp;      lastmatchrestart = pp;
914      }      }
915    hyphenpending = TRUE;    hyphenpending = TRUE;
# Line 733  if (after_context > 0 && lastmatchnumber Line 919  if (after_context > 0 && lastmatchnumber
919    
920    
921  /*************************************************  /*************************************************
922    *   Apply patterns to subject till one matches   *
923    *************************************************/
924    
925    /* This function is called to run through all patterns, looking for a match. It
926    is used multiple times for the same subject when colouring is enabled, in order
927    to find all possible matches.
928    
929    Arguments:
930      matchptr    the start of the subject
931      length      the length of the subject to match
932      offsets     the offets vector to fill in
933      mrc         address of where to put the result of pcre_exec()
934    
935    Returns:      TRUE if there was a match
936                  FALSE if there was no match
937                  invert if there was a non-fatal error
938    */
939    
940    static BOOL
941    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
942    {
943    int i;
944    size_t slen = length;
945    const char *msg = "this text:\n\n";
946    if (slen > 200)
947      {
948      slen = 200;
949      msg = "text that starts:\n\n";
950      }
951    for (i = 0; i < pattern_count; i++)
952      {
953      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
954        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
955      if (*mrc >= 0) return TRUE;
956      if (*mrc == PCRE_ERROR_NOMATCH) continue;
957      fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
958      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
959      fprintf(stderr, "%s", msg);
960      FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
961      fprintf(stderr, "\n\n");
962      if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
963        resource_error = TRUE;
964      if (error_count++ > 20)
965        {
966        fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
967        pcregrep_exit(2);
968        }
969      return invert;    /* No more matching; don't show the line again */
970      }
971    
972    return FALSE;  /* No match, no errors */
973    }
974    
975    
976    
977    /*************************************************
978  *            Grep an individual file             *  *            Grep an individual file             *
979  *************************************************/  *************************************************/
980    
# Line 744  be in the middle third most of the time, Line 986  be in the middle third most of the time,
986  "before" context printing.  "before" context printing.
987    
988  Arguments:  Arguments:
989    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
990                   the gzFile pointer when reading is via libz
991                   the BZFILE pointer when reading is via libbz2
992      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
993    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
994                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
995                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
996    
997  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
998                 1 otherwise (no matches)                 1 otherwise (no matches)
999                   2 if there is a read error on a .bz2 file
1000  */  */
1001    
1002  static int  static int
1003  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
1004  {  {
1005  int rc = 1;  int rc = 1;
1006  int linenumber = 1;  int linenumber = 1;
1007  int lastmatchnumber = 0;  int lastmatchnumber = 0;
1008  int count = 0;  int count = 0;
1009  int offsets[99];  int filepos = 0;
1010    int offsets[OFFSET_SIZE];
1011  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
1012  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
1013  char *ptr = buffer;  char *ptr = buffer;
1014  char *endptr;  char *endptr;
1015  size_t bufflength;  size_t bufflength;
1016  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
1017    BOOL input_line_buffered = line_buffered;
1018    FILE *in = NULL;                    /* Ensure initialized */
1019    
1020    #ifdef SUPPORT_LIBZ
1021    gzFile ingz = NULL;
1022    #endif
1023    
1024    #ifdef SUPPORT_LIBBZ2
1025    BZFILE *inbz2 = NULL;
1026    #endif
1027    
 /* Do the first read into the start of the buffer and set up the pointer to  
 end of what we have. */  
1028    
1029  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  /* Do the first read into the start of the buffer and set up the pointer to end
1030    of what we have. In the case of libz, a non-zipped .gz file will be read as a
1031    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1032    fail. */
1033    
1034    #ifdef SUPPORT_LIBZ
1035    if (frtype == FR_LIBZ)
1036      {
1037      ingz = (gzFile)handle;
1038      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1039      }
1040    else
1041    #endif
1042    
1043    #ifdef SUPPORT_LIBBZ2
1044    if (frtype == FR_LIBBZ2)
1045      {
1046      inbz2 = (BZFILE *)handle;
1047      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1048      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
1049      }                                    /* without the cast it is unsigned. */
1050    else
1051    #endif
1052    
1053      {
1054      in = (FILE *)handle;
1055      if (is_file_tty(in)) input_line_buffered = TRUE;
1056      bufflength = input_line_buffered?
1057        read_one_line(buffer, 3*MBUFTHIRD, in) :
1058        fread(buffer, 1, 3*MBUFTHIRD, in);
1059      }
1060    
1061  endptr = buffer + bufflength;  endptr = buffer + bufflength;
1062    
1063  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 781  way, the buffer is shifted left and re-f Line 1067  way, the buffer is shifted left and re-f
1067    
1068  while (ptr < endptr)  while (ptr < endptr)
1069    {    {
1070    int i, endlinelength;    int endlinelength;
1071    int mrc = 0;    int mrc = 0;
1072    BOOL match = FALSE;    BOOL match;
1073      char *matchptr = ptr;
1074    char *t = ptr;    char *t = ptr;
1075    size_t length, linelength;    size_t length, linelength;
1076    
1077    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
1078    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
1079    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
1080    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
1081    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1082    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
1083      first line. */
1084    
1085    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
1086    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
1087    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
1088    
1089    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
1090    
# Line 807  while (ptr < endptr) Line 1095  while (ptr < endptr)
1095        #include <time.h>        #include <time.h>
1096        struct timeval start_time, end_time;        struct timeval start_time, end_time;
1097        struct timezone dummy;        struct timezone dummy;
1098          int i;
1099    
1100        if (jfriedl_XT)        if (jfriedl_XT)
1101        {        {
# Line 815  while (ptr < endptr) Line 1104  while (ptr < endptr)
1104            ptr = malloc(newlen + 1);            ptr = malloc(newlen + 1);
1105            if (!ptr) {            if (!ptr) {
1106                    printf("out of memory");                    printf("out of memory");
1107                    exit(2);                    pcregrep_exit(2);
1108            }            }
1109            endptr = ptr;            endptr = ptr;
1110            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
# Line 832  while (ptr < endptr) Line 1121  while (ptr < endptr)
1121    
1122    
1123        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1124            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1125                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1126    
1127        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1128                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 846  while (ptr < endptr) Line 1136  while (ptr < endptr)
1136    }    }
1137  #endif  #endif
1138    
1139      /* We come back here after a match when the -o option (only_matching) is set,
1140      in order to find any further matches in the same line. */
1141    
1142    /* Run through all the patterns until one matches. Note that we don't include    ONLY_MATCHING_RESTART:
   the final newline in the subject string. */  
1143    
1144    for (i = 0; i < pattern_count; i++)    /* Run through all the patterns until one matches or there is an error other
1145      {    than NOMATCH. This code is in a subroutine so that it can be re-used for
1146      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    finding subsequent matches when colouring matched lines. */
1147        offsets, 99);  
1148      if (mrc >= 0) { match = TRUE; break; }    match = match_patterns(matchptr, length, offsets, &mrc);
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1149    
1150    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1151    
# Line 896  while (ptr < endptr) Line 1164  while (ptr < endptr)
1164      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1165      in the file. */      in the file. */
1166    
1167      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1168        {        {
1169        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1170        return 0;        return 0;
# Line 906  while (ptr < endptr) Line 1174  while (ptr < endptr)
1174    
1175      else if (quiet) return 0;      else if (quiet) return 0;
1176    
1177      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, or a
1178      does not pring any context. */      captured portion of it, as long as this string is not empty, and the
1179        --file-offsets and --line-offsets options output offsets for the matching
1180        substring (they both force --only-matching = 0). None of these options
1181        prints any context. Afterwards, adjust the start and length, and then jump
1182        back to look for further matches in the same line. If we are in invert
1183        mode, however, nothing is printed and we do not restart - this could still
1184        be useful because the return code is set. */
1185    
1186      else if (only_matching)      else if (only_matching >= 0)
1187        {        {
1188        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1189        if (number) fprintf(stdout, "%d:", linenumber);          {
1190        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1191        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1192            if (line_offsets)
1193              fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1194                offsets[1] - offsets[0]);
1195            else if (file_offsets)
1196              fprintf(stdout, "%d,%d\n",
1197                (int)(filepos + matchptr + offsets[0] - ptr),
1198                offsets[1] - offsets[0]);
1199            else if (only_matching < mrc)
1200              {
1201              int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1202              if (plen > 0)
1203                {
1204                if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1205                FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1206                if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1207                fprintf(stdout, "\n");
1208                }
1209              }
1210            else if (printname != NULL || number) fprintf(stdout, "\n");
1211            matchptr += offsets[1];
1212            length -= offsets[1];
1213            match = FALSE;
1214            if (line_buffered) fflush(stdout);
1215            rc = 0;    /* Had some success */
1216            goto ONLY_MATCHING_RESTART;
1217            }
1218        }        }
1219    
1220      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 948  while (ptr < endptr) Line 1248  while (ptr < endptr)
1248            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1249            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1250            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1251            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1252            lastmatchrestart = pp;            lastmatchrestart = pp;
1253            }            }
1254          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
# Line 988  while (ptr < endptr) Line 1288  while (ptr < endptr)
1288            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1289            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1290            pp = end_of_line(pp, endptr, &ellength);            pp = end_of_line(pp, endptr, &ellength);
1291            fwrite(p, 1, pp - p, stdout);            FWRITE(p, 1, pp - p, stdout);
1292            p = pp;            p = pp;
1293            }            }
1294          }          }
# Line 1004  while (ptr < endptr) Line 1304  while (ptr < endptr)
1304    
1305        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1306        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1307        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1308        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1309          the match will always be before the first newline sequence. */
1310    
1311        if (multiline)        if (multiline)
1312          {          {
1313          int ellength;          int ellength;
1314          char *endmatch = ptr + offsets[1];          char *endmatch = ptr;
1315          t = ptr;          if (!invert)
         while (t < endmatch)  
1316            {            {
1317            t = end_of_line(t, endptr, &ellength);            endmatch += offsets[1];
1318            if (t <= endmatch) linenumber++; else break;            t = ptr;
1319              while (t < endmatch)
1320                {
1321                t = end_of_line(t, endptr, &ellength);
1322                if (t <= endmatch) linenumber++; else break;
1323                }
1324            }            }
1325          endmatch = end_of_line(endmatch, endptr, &ellength);          endmatch = end_of_line(endmatch, endptr, &ellength);
1326          linelength = endmatch - ptr - ellength;          linelength = endmatch - ptr - ellength;
# Line 1033  while (ptr < endptr) Line 1338  while (ptr < endptr)
1338          {          {
1339          int first = S_arg * 2;          int first = S_arg * 2;
1340          int last  = first + 1;          int last  = first + 1;
1341          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1342          fprintf(stdout, "X");          fprintf(stdout, "X");
1343          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1344          }          }
1345        else        else
1346  #endif  #endif
1347    
1348        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1349          matches. */
1350    
1351        if (do_colour)        if (do_colour)
1352          {          {
1353          fwrite(ptr, 1, offsets[0], stdout);          int last_offset = 0;
1354            FWRITE(ptr, 1, offsets[0], stdout);
1355          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1356          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1357          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1358          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1359              {
1360              last_offset += offsets[1];
1361              matchptr += offsets[1];
1362              length -= offsets[1];
1363              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1364              FWRITE(matchptr, 1, offsets[0], stdout);
1365              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1366              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1367              fprintf(stdout, "%c[00m", 0x1b);
1368              }
1369            FWRITE(ptr + last_offset, 1,
1370              (linelength + endlinelength) - last_offset, stdout);
1371          }          }
1372        else fwrite(ptr, 1, linelength + endlinelength, stdout);  
1373          /* Not colouring; no need to search for further matches */
1374    
1375          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1376        }        }
1377    
1378      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match. If --line-buffered was
1379        given, flush the output. */
1380    
1381        if (line_buffered) fflush(stdout);
1382      rc = 0;    /* Had some success */      rc = 0;    /* Had some success */
1383    
1384      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
# Line 1064  while (ptr < endptr) Line 1388  while (ptr < endptr)
1388      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1389      }      }
1390    
1391    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1392      anything to be printed), we have to move on to the end of the match before
1393      proceeding. */
1394    
1395      if (multiline && invert && match)
1396        {
1397        int ellength;
1398        char *endmatch = ptr + offsets[1];
1399        t = ptr;
1400        while (t < endmatch)
1401          {
1402          t = end_of_line(t, endptr, &ellength);
1403          if (t <= endmatch) linenumber++; else break;
1404          }
1405        endmatch = end_of_line(endmatch, endptr, &ellength);
1406        linelength = endmatch - ptr - ellength;
1407        }
1408    
1409      /* Advance to after the newline and increment the line number. The file
1410      offset to the current line is maintained in filepos. */
1411    
1412    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1413      filepos += (int)(linelength + endlinelength);
1414    linenumber++;    linenumber++;
1415    
1416      /* If input is line buffered, and the buffer is not yet full, read another
1417      line and add it into the buffer. */
1418    
1419      if (input_line_buffered && bufflength < sizeof(buffer))
1420        {
1421        int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1422        bufflength += add;
1423        endptr += add;
1424        }
1425    
1426    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
1427    the current point is in the top 1/3 of the buffer, slide the buffer down by    the current point is in the top 1/3 of the buffer, slide the buffer down by
1428    1/3 and refill it. Before we do this, if some unprinted "after" lines are    1/3 and refill it. Before we do this, if some unprinted "after" lines are
# Line 1088  while (ptr < endptr) Line 1442  while (ptr < endptr)
1442    
1443      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1444      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1445      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);  
1446    #ifdef SUPPORT_LIBZ
1447        if (frtype == FR_LIBZ)
1448          bufflength = 2*MBUFTHIRD +
1449            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1450        else
1451    #endif
1452    
1453    #ifdef SUPPORT_LIBBZ2
1454        if (frtype == FR_LIBBZ2)
1455          bufflength = 2*MBUFTHIRD +
1456            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1457        else
1458    #endif
1459    
1460        bufflength = 2*MBUFTHIRD +
1461          (input_line_buffered?
1462           read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1463           fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1464      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1465    
1466      /* Adjust any last match point */      /* Adjust any last match point */
# Line 1100  while (ptr < endptr) Line 1472  while (ptr < endptr)
1472  /* End of file; print final "after" lines if wanted; do_after_lines sets  /* End of file; print final "after" lines if wanted; do_after_lines sets
1473  hyphenpending if it prints something. */  hyphenpending if it prints something. */
1474    
1475  if (!only_matching && !count_only)  if (only_matching < 0 && !count_only)
1476    {    {
1477    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1478    hyphenpending |= endhyphenpending;    hyphenpending |= endhyphenpending;
# Line 1119  if (filenames == FN_NOMATCH_ONLY) Line 1491  if (filenames == FN_NOMATCH_ONLY)
1491    
1492  if (count_only)  if (count_only)
1493    {    {
1494    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1495    fprintf(stdout, "%d\n", count);      {
1496        if (printname != NULL && filenames != FN_NONE)
1497          fprintf(stdout, "%s:", printname);
1498        fprintf(stdout, "%d\n", count);
1499        }
1500    }    }
1501    
1502  return rc;  return rc;
# Line 1152  grep_or_recurse(char *pathname, BOOL dir Line 1528  grep_or_recurse(char *pathname, BOOL dir
1528  {  {
1529  int rc = 1;  int rc = 1;
1530  int sep;  int sep;
1531  FILE *in;  int frtype;
1532    int pathlen;
1533    void *handle;
1534    FILE *in = NULL;           /* Ensure initialized */
1535    
1536    #ifdef SUPPORT_LIBZ
1537    gzFile ingz = NULL;
1538    #endif
1539    
1540    #ifdef SUPPORT_LIBBZ2
1541    BZFILE *inbz2 = NULL;
1542    #endif
1543    
1544  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1545    
1546  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1547    {    {
1548    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1549      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1550        stdin_name : NULL);        stdin_name : NULL);
1551    }    }
1552    
   
1553  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1554  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1555  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1556    system-specific. */
1557    
1558  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1559    {    {
# Line 1187  if ((sep = isdirectory(pathname)) != 0) Line 1574  if ((sep = isdirectory(pathname)) != 0)
1574    
1575      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1576        {        {
1577        int frc, blen;        int frc, nflen;
1578        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1579        blen = strlen(buffer);        nflen = (int)(strlen(nextfile));
1580    
1581          if (isdirectory(buffer))
1582            {
1583            if (exclude_dir_compiled != NULL &&
1584                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1585              continue;
1586    
1587        if (exclude_compiled != NULL &&          if (include_dir_compiled != NULL &&
1588            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)              pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1589          continue;            continue;
1590            }
1591        if (include_compiled != NULL &&        else
1592            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)          {
1593          continue;          if (exclude_compiled != NULL &&
1594                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1595              continue;
1596    
1597            if (include_compiled != NULL &&
1598                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1599              continue;
1600            }
1601    
1602        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1603        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 1220  skipping was not requested. The scan pro Line 1620  skipping was not requested. The scan pro
1620  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1621  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1622    
1623  in = fopen(pathname, "r");  pathlen = (int)(strlen(pathname));
1624  if (in == NULL)  
1625    /* Open using zlib if it is supported and the file name ends with .gz. */
1626    
1627    #ifdef SUPPORT_LIBZ
1628    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1629      {
1630      ingz = gzopen(pathname, "rb");
1631      if (ingz == NULL)
1632        {
1633        if (!silent)
1634          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1635            strerror(errno));
1636        return 2;
1637        }
1638      handle = (void *)ingz;
1639      frtype = FR_LIBZ;
1640      }
1641    else
1642    #endif
1643    
1644    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1645    
1646    #ifdef SUPPORT_LIBBZ2
1647    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1648      {
1649      inbz2 = BZ2_bzopen(pathname, "rb");
1650      handle = (void *)inbz2;
1651      frtype = FR_LIBBZ2;
1652      }
1653    else
1654    #endif
1655    
1656    /* Otherwise use plain fopen(). The label is so that we can come back here if
1657    an attempt to read a .bz2 file indicates that it really is a plain file. */
1658    
1659    #ifdef SUPPORT_LIBBZ2
1660    PLAIN_FILE:
1661    #endif
1662      {
1663      in = fopen(pathname, "rb");
1664      handle = (void *)in;
1665      frtype = FR_PLAIN;
1666      }
1667    
1668    /* All the opening methods return errno when they fail. */
1669    
1670    if (handle == NULL)
1671    {    {
1672    if (!silent)    if (!silent)
1673      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 1229  if (in == NULL) Line 1675  if (in == NULL)
1675    return 2;    return 2;
1676    }    }
1677    
1678  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1679    
1680    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1681    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1682    
1683    /* Close in an appropriate manner. */
1684    
1685    #ifdef SUPPORT_LIBZ
1686    if (frtype == FR_LIBZ)
1687      gzclose(ingz);
1688    else
1689    #endif
1690    
1691    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1692    read failed. If the error indicates that the file isn't in fact bzipped, try
1693    again as a normal file. */
1694    
1695    #ifdef SUPPORT_LIBBZ2
1696    if (frtype == FR_LIBBZ2)
1697      {
1698      if (rc == 2)
1699        {
1700        int errnum;
1701        const char *err = BZ2_bzerror(inbz2, &errnum);
1702        if (errnum == BZ_DATA_ERROR_MAGIC)
1703          {
1704          BZ2_bzclose(inbz2);
1705          goto PLAIN_FILE;
1706          }
1707        else if (!silent)
1708          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1709            pathname, err);
1710        }
1711      BZ2_bzclose(inbz2);
1712      }
1713    else
1714    #endif
1715    
1716    /* Normal file close */
1717    
1718  fclose(in);  fclose(in);
1719    
1720    /* Pass back the yield from pcregrep(). */
1721    
1722  return rc;  return rc;
1723  }  }
1724    
# Line 1253  for (op = optionlist; op->one_char != 0; Line 1739  for (op = optionlist; op->one_char != 0;
1739    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1740    }    }
1741  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1742  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1743      "options.\n");
1744  return rc;  return rc;
1745  }  }
1746    
# Line 1272  option_item *op; Line 1759  option_item *op;
1759  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1760  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1761  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1762  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1763  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1764    #ifdef SUPPORT_LIBZ
1765    printf("Files whose names end in .gz are read using zlib.\n");
1766    #endif
1767    
1768    #ifdef SUPPORT_LIBBZ2
1769    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1770    #endif
1771    
1772    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1773    printf("Other files and the standard input are read as plain files.\n\n");
1774    #else
1775    printf("All files are read as plain files, without any interpretation.\n\n");
1776    #endif
1777    
1778    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1779  printf("Options:\n");  printf("Options:\n");
1780    
1781  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 1282  for (op = optionlist; op->one_char != 0; Line 1783  for (op = optionlist; op->one_char != 0;
1783    int n;    int n;
1784    char s[4];    char s[4];
1785    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1786    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1787    if (n < 1) n = 1;    if (n < 1) n = 1;
1788    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1789    }    }
# Line 1308  handle_option(int letter, int options) Line 1808  handle_option(int letter, int options)
1808  {  {
1809  switch(letter)  switch(letter)
1810    {    {
1811    case N_HELP: help(); exit(0);    case N_FOFFSETS: file_offsets = TRUE; break;
1812      case N_HELP: help(); pcregrep_exit(0);
1813      case N_LOFFSETS: line_offsets = number = TRUE; break;
1814      case N_LBUFFER: line_buffered = TRUE; break;
1815    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1816    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1817    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1818    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1819    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1820    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1821    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1822    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1823    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1824    case 'o': only_matching = TRUE; break;    case 'o': only_matching = 0; break;
1825    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1826    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1827    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
# Line 1328  switch(letter) Line 1831  switch(letter)
1831    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1832    
1833    case 'V':    case 'V':
1834    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
1835    fprintf(stderr, "PCRE version %s\n", pcre_version());    pcregrep_exit(0);
   exit(0);  
1836    break;    break;
1837    
1838    default:    default:
1839    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1840    exit(usage(2));    pcregrep_exit(usage(2));
1841    }    }
1842    
1843  return options;  return options;
# Line 1405  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1907  sprintf(buffer, "%s%.*s%s", prefix[proce
1907    suffix[process_options]);    suffix[process_options]);
1908  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1909    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1910  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1911      {
1912      pattern_count++;
1913      return TRUE;
1914      }
1915    
1916  /* Handle compile errors */  /* Handle compile errors */
1917    
# Line 1463  if ((process_options & PO_FIXED_STRINGS) Line 1969  if ((process_options & PO_FIXED_STRINGS)
1969      char *p = end_of_line(pattern, eop, &ellength);      char *p = end_of_line(pattern, eop, &ellength);
1970      if (ellength == 0)      if (ellength == 0)
1971        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1972      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1973      pattern = p;      pattern = p;
1974      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1975        return FALSE;        return FALSE;
# Line 1487  int i, j; Line 1993  int i, j;
1993  int rc = 1;  int rc = 1;
1994  int pcre_options = 0;  int pcre_options = 0;
1995  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1996    int hint_count = 0;
1997  int errptr;  int errptr;
1998  BOOL only_one_at_top;  BOOL only_one_at_top;
1999  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
# Line 1495  const char *error; Line 2002  const char *error;
2002    
2003  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
2004  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2005  */  Note that the return values from pcre_config(), though derived from the ASCII
2006    codes, are the same in EBCDIC environments, so we must use the actual values
2007    rather than escapes such as as '\r'. */
2008    
2009  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2010  switch(i)  switch(i)
2011    {    {
2012    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
2013    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
2014    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
2015    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
2016      case -2:               newline = (char *)"anycrlf"; break;
2017    }    }
2018    
2019  /* Process the options */  /* Process the options */
# Line 1523  for (i = 1; i < argc; i++) Line 2033  for (i = 1; i < argc; i++)
2033    if (argv[i][1] == 0)    if (argv[i][1] == 0)
2034      {      {
2035      if (pattern_filename != NULL || pattern_count > 0) break;      if (pattern_filename != NULL || pattern_count > 0) break;
2036        else exit(usage(2));        else pcregrep_exit(usage(2));
2037      }      }
2038    
2039    /* Handle a long name option, or -- to terminate the options */    /* Handle a long name option, or -- to terminate the options */
# Line 1545  for (i = 1; i < argc; i++) Line 2055  for (i = 1; i < argc; i++)
2055      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
2056      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
2057      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2058      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
2059      these categories, fortunately. */      both these categories. */
2060    
2061      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
2062        {        {
2063        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
2064        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
2065        if (opbra == NULL)     /* Not a (p) case */  
2066          /* Handle options with only one spelling of the name */
2067    
2068          if (opbra == NULL)     /* Does not contain '(' */
2069          {          {
2070          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
2071            {            {
# Line 1560  for (i = 1; i < argc; i++) Line 2073  for (i = 1; i < argc; i++)
2073            }            }
2074          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
2075            {            {
2076            int oplen = equals - op->long_name;            int oplen = (int)(equals - op->long_name);
2077            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)?
2078                (int)strlen(arg) : (int)(argequals - arg);
2079            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2080              {              {
2081              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1574  for (i = 1; i < argc; i++) Line 2088  for (i = 1; i < argc; i++)
2088              }              }
2089            }            }
2090          }          }
2091        else                   /* Special case xxxx(p) */  
2092          /* Handle options with an alternate spelling of the name */
2093    
2094          else
2095          {          {
2096          char buff1[24];          char buff1[24];
2097          char buff2[24];          char buff2[24];
2098          int baselen = opbra - op->long_name;  
2099            int baselen = (int)(opbra - op->long_name);
2100            int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2101            int arglen = (argequals == NULL || equals == NULL)?
2102              (int)strlen(arg) : (int)(argequals - arg);
2103    
2104          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
2105          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2106            opbra + 1);  
2107          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
2108               strncmp(arg, buff2, arglen) == 0)
2109              {
2110              if (equals != NULL && argequals != NULL)
2111                {
2112                option_data = argequals;
2113                if (*option_data == '=')
2114                  {
2115                  option_data++;
2116                  longopwasequals = TRUE;
2117                  }
2118                }
2119            break;            break;
2120              }
2121          }          }
2122        }        }
2123    
2124      if (op->one_char == 0)      if (op->one_char == 0)
2125        {        {
2126        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2127        exit(usage(2));        pcregrep_exit(usage(2));
2128        }        }
2129      }      }
2130    
   
2131    /* Jeffrey Friedl's debugging harness uses these additional options which    /* Jeffrey Friedl's debugging harness uses these additional options which
2132    are not in the right form for putting in the option table because they use    are not in the right form for putting in the option table because they use
2133    only one hyphen, yet are more than one character long. By putting them    only one hyphen, yet are more than one character long. By putting them
# Line 1628  for (i = 1; i < argc; i++) Line 2161  for (i = 1; i < argc; i++)
2161      while (*s != 0)      while (*s != 0)
2162        {        {
2163        for (op = optionlist; op->one_char != 0; op++)        for (op = optionlist; op->one_char != 0; op++)
2164          { if (*s == op->one_char) break; }          {
2165            if (*s == op->one_char) break;
2166            }
2167        if (op->one_char == 0)        if (op->one_char == 0)
2168          {          {
2169          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2170            *s, argv[i]);            *s, argv[i]);
2171          exit(usage(2));          pcregrep_exit(usage(2));
2172          }          }
2173        if (op->type != OP_NODATA || s[1] == 0)  
2174          {        /* Check for a single-character option that has data: OP_OP_NUMBER
2175          option_data = s+1;        is used for one that either has a numerical number or defaults, i.e. the
2176          break;        data is optional. If a digit follows, there is data; if not, carry on
2177          with other single-character options in the same string. */
2178    
2179          option_data = s+1;
2180          if (op->type == OP_OP_NUMBER)
2181            {
2182            if (isdigit((unsigned char)s[1])) break;
2183          }          }
2184          else   /* Check for end or a dataless option */
2185            {
2186            if (op->type != OP_NODATA || s[1] == 0) break;
2187            }
2188    
2189          /* Handle a single-character option with no data, then loop for the
2190          next character in the string. */
2191    
2192        pcre_options = handle_option(*s++, pcre_options);        pcre_options = handle_option(*s++, pcre_options);
2193        }        }
2194      }      }
# Line 1656  for (i = 1; i < argc; i++) Line 2205  for (i = 1; i < argc; i++)
2205    
2206    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2207    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2208    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r",
2209    Jeffrey Friedl's special -S debugging option. */    "only-matching", and Jeffrey Friedl's special -S debugging option. */
2210    
2211    if (*option_data == 0 &&    if (*option_data == 0 &&
2212        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1667  for (i = 1; i < argc; i++) Line 2216  for (i = 1; i < argc; i++)
2216        case N_COLOUR:        case N_COLOUR:
2217        colour_option = (char *)"auto";        colour_option = (char *)"auto";
2218        break;        break;
2219    
2220          case 'o':
2221          only_matching = 0;
2222          break;
2223    
2224  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2225        case 'S':        case 'S':
2226        S_arg = 0;        S_arg = 0;
# Line 1683  for (i = 1; i < argc; i++) Line 2237  for (i = 1; i < argc; i++)
2237      if (i >= argc - 1 || longopwasequals)      if (i >= argc - 1 || longopwasequals)
2238        {        {
2239        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2240        exit(usage(2));        pcregrep_exit(usage(2));
2241        }        }
2242      option_data = argv[++i];      option_data = argv[++i];
2243      }      }
# Line 1708  for (i = 1; i < argc; i++) Line 2262  for (i = 1; i < argc; i++)
2262      {      {
2263      *((char **)op->dataptr) = option_data;      *((char **)op->dataptr) = option_data;
2264      }      }
2265    
2266      /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2267      only for unpicking arguments, so just keep it simple. */
2268    
2269    else    else
2270      {      {
2271      char *endptr;      unsigned long int n = 0;
2272      int n = strtoul(option_data, &endptr, 10);      char *endptr = option_data;
2273        while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2274        while (isdigit((unsigned char)(*endptr)))
2275          n = n * 10 + (int)(*endptr++ - '0');
2276      if (*endptr != 0)      if (*endptr != 0)
2277        {        {
2278        if (longop)        if (longop)
2279          {          {
2280          char *equals = strchr(op->long_name, '=');          char *equals = strchr(op->long_name, '=');
2281          int nlen = (equals == NULL)? (int)strlen(op->long_name) :          int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2282            equals - op->long_name;            (int)(equals - op->long_name);
2283          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2284            option_data, nlen, op->long_name);            option_data, nlen, op->long_name);
2285          }          }
2286        else        else
2287          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",          fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2288            option_data, op->one_char);            option_data, op->one_char);
2289        exit(usage(2));        pcregrep_exit(usage(2));
2290        }        }
2291      *((int *)op->dataptr) = n;      *((int *)op->dataptr) = n;
2292      }      }
# Line 1740  if (both_context > 0) Line 2301  if (both_context > 0)
2301    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2302    }    }
2303    
2304    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2305    However, the latter two set only_matching. */
2306    
2307    if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2308        (file_offsets && line_offsets))
2309      {
2310      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2311        "and/or --line-offsets\n");
2312      pcregrep_exit(usage(2));
2313      }
2314    
2315    if (file_offsets || line_offsets) only_matching = 0;
2316    
2317  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2318  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2319    
# Line 1811  else if (strcmp(newline, "any") == 0 || Line 2385  else if (strcmp(newline, "any") == 0 ||
2385    pcre_options |= PCRE_NEWLINE_ANY;    pcre_options |= PCRE_NEWLINE_ANY;
2386    endlinetype = EL_ANY;    endlinetype = EL_ANY;
2387    }    }
2388    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2389      {
2390      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2391      endlinetype = EL_ANYCRLF;
2392      }
2393  else  else
2394    {    {
2395    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
# Line 1865  hints_list = (pcre_extra **)malloc(MAX_P Line 2444  hints_list = (pcre_extra **)malloc(MAX_P
2444  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
2445    {    {
2446    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2447    return 2;    goto EXIT2;
2448    }    }
2449    
2450  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1884  for (j = 0; j < cmd_pattern_count; j++) Line 2463  for (j = 0; j < cmd_pattern_count; j++)
2463    {    {
2464    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
2465         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2466      return 2;      goto EXIT2;
2467    }    }
2468    
2469  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1908  if (pattern_filename != NULL) Line 2487  if (pattern_filename != NULL)
2487        {        {
2488        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2489          strerror(errno));          strerror(errno));
2490        return 2;        goto EXIT2;
2491        }        }
2492      filename = pattern_filename;      filename = pattern_filename;
2493      }      }
# Line 1921  if (pattern_filename != NULL) Line 2500  if (pattern_filename != NULL)
2500      linenumber++;      linenumber++;
2501      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2502      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2503        return 2;        goto EXIT2;
2504      }      }
2505    
2506    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1937  for (j = 0; j < pattern_count; j++) Line 2516  for (j = 0; j < pattern_count; j++)
2516      char s[16];      char s[16];
2517      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2518      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2519      return 2;      goto EXIT2;
2520      }      }
2521      hint_count++;
2522    }    }
2523    
2524    /* If --match-limit or --recursion-limit was set, put the value(s) into the
2525    pcre_extra block for each pattern. */
2526    
2527    if (match_limit > 0 || match_limit_recursion > 0)
2528      {
2529      for (j = 0; j < pattern_count; j++)
2530        {
2531        if (hints_list[j] == NULL)
2532          {
2533          hints_list[j] = malloc(sizeof(pcre_extra));
2534          if (hints_list[j] == NULL)
2535            {
2536            fprintf(stderr, "pcregrep: malloc failed\n");
2537            pcregrep_exit(2);
2538            }
2539          }
2540        if (match_limit > 0)
2541          {
2542          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2543          hints_list[j]->match_limit = match_limit;
2544          }
2545        if (match_limit_recursion > 0)
2546          {
2547          hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2548          hints_list[j]->match_limit_recursion = match_limit_recursion;
2549          }
2550        }
2551      }
2552    
2553  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
2554    
# Line 1951  if (exclude_pattern != NULL) Line 2560  if (exclude_pattern != NULL)
2560      {      {
2561      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2562        errptr, error);        errptr, error);
2563      return 2;      goto EXIT2;
2564      }      }
2565    }    }
2566    
# Line 1963  if (include_pattern != NULL) Line 2572  if (include_pattern != NULL)
2572      {      {
2573      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2574        errptr, error);        errptr, error);
2575      return 2;      goto EXIT2;
2576        }
2577      }
2578    
2579    if (exclude_dir_pattern != NULL)
2580      {
2581      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2582        pcretables);
2583      if (exclude_dir_compiled == NULL)
2584        {
2585        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2586          errptr, error);
2587        goto EXIT2;
2588        }
2589      }
2590    
2591    if (include_dir_pattern != NULL)
2592      {
2593      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2594        pcretables);
2595      if (include_dir_compiled == NULL)
2596        {
2597        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2598          errptr, error);
2599        goto EXIT2;
2600      }      }
2601    }    }
2602    
2603  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2604    
2605  if (i >= argc)  if (i >= argc)
2606    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2607      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2608      goto EXIT;
2609      }
2610    
2611  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2612  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1987  for (; i < argc; i++) Line 2623  for (; i < argc; i++)
2623      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2624    }    }
2625    
2626  return rc;  EXIT:
2627    if (pattern_list != NULL)
2628      {
2629      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2630      free(pattern_list);
2631      }
2632    if (hints_list != NULL)
2633      {
2634      for (i = 0; i < hint_count; i++)
2635        {
2636        if (hints_list[i] != NULL) free(hints_list[i]);
2637        }
2638      free(hints_list);
2639      }
2640    pcregrep_exit(rc);
2641    
2642    EXIT2:
2643    rc = 2;
2644    goto EXIT;
2645  }  }
2646    
2647  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.96  
changed lines
  Added in v.565

  ViewVC Help
Powered by ViewVC 1.1.5