/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 515 by ph10, Tue May 4 09:12:25 2010 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.2 09-Jan-2006"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 65  typedef int BOOL; Line 79  typedef int BOOL;
79  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
80  #endif  #endif
81    
   
82  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
83  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
84  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
85    
86  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88    /* File reading styles */
89    
90    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
93    
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 100  enum { DEE_READ, DEE_SKIP };
100  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
101  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
102    
103    /* Line ending types */
104    
105    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106    
107    /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108    environments), a warning is issued if the value of fwrite() is ignored.
109    Unfortunately, casting to (void) does not suppress the warning. To get round
110    this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111    apply to fprintf(). */
112    
113    #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114    
115    
116    
117  /*************************************************  /*************************************************
# Line 94  regular code. */ Line 123  regular code. */
123    
124  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
125  static int S_arg = -1;  static int S_arg = -1;
126    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128    static const char *jfriedl_prefix = "";
129    static const char *jfriedl_postfix = "";
130  #endif  #endif
131    
132    static int  endlinetype;
133    
134  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
135  static char *colour_option = NULL;  static char *colour_option = NULL;
136  static char *dee_option = NULL;  static char *dee_option = NULL;
137  static char *DEE_option = NULL;  static char *DEE_option = NULL;
138    static char *newline = NULL;
139  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
140  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
141  static char *locale = NULL;  static char *locale = NULL;
# Line 107  static char *locale = NULL; Line 143  static char *locale = NULL;
143  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
144    
145  static int  pattern_count = 0;  static int  pattern_count = 0;
146  static pcre **pattern_list;  static pcre **pattern_list = NULL;
147  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
148    
149  static char *include_pattern = NULL;  static char *include_pattern = NULL;
150  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
151    static char *include_dir_pattern = NULL;
152    static char *exclude_dir_pattern = NULL;
153    
154  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
155  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
156    static pcre *include_dir_compiled = NULL;
157    static pcre *exclude_dir_compiled = NULL;
158    
159  static int after_context = 0;  static int after_context = 0;
160  static int before_context = 0;  static int before_context = 0;
# Line 127  static int process_options = 0; Line 167  static int process_options = 0;
167    
168  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
169  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
170    static BOOL file_offsets = FALSE;
171  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
172  static BOOL invert = FALSE;  static BOOL invert = FALSE;
173    static BOOL line_offsets = FALSE;
174  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
175  static BOOL number = FALSE;  static BOOL number = FALSE;
176    static BOOL omit_zero_count = FALSE;
177  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
178  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
179  static BOOL silent = FALSE;  static BOOL silent = FALSE;
180    static BOOL utf8 = FALSE;
181    
182  /* Structure for options and list of them */  /* Structure for options and list of them */
183    
# Line 151  typedef struct option_item { Line 195  typedef struct option_item {
195  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
196  used to identify them. */  used to identify them. */
197    
198  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
199  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
200  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
201  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
202  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
203  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
204  #define N_NULL      (-7)  #define N_LABEL        (-7)
205    #define N_LOCALE       (-8)
206    #define N_NULL         (-9)
207    #define N_LOFFSETS     (-10)
208    #define N_FOFFSETS     (-11)
209    
210  static option_item optionlist[] = {  static option_item optionlist[] = {
211    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 170  static option_item optionlist[] = { Line 218  static option_item optionlist[] = {
218    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
219    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
220    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
221    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)=pattern", "specify pattern (may be used more than once)" },
222    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
223    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
224      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
225    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
226    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
227    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
228    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
229    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
230    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
231      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
232    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
233    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
234      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
235    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
236    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
237    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
238    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
239    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
240    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
241      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
242      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
243  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
244    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
245  #endif  #endif
# Line 210  static const char *prefix[] = { Line 263  static const char *prefix[] = {
263  static const char *suffix[] = {  static const char *suffix[] = {
264    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
265    
266    /* UTF-8 tables - used only when the newline setting is "any". */
267    
268    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
269    
270    const char utf8_table4[] = {
271      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
272      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
273      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
274      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
275    
276    
277    
278  /*************************************************  /*************************************************
# Line 222  although at present the only ones are fo Line 285  although at present the only ones are fo
285    
286  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
287    
288  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
289  #include <sys/types.h>  #include <sys/types.h>
290  #include <sys/stat.h>  #include <sys/stat.h>
291  #include <dirent.h>  #include <dirent.h>
# Line 254  for (;;) Line 317  for (;;)
317    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
318      return dent->d_name;      return dent->d_name;
319    }    }
320  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
321  }  }
322    
323  static void  static void
# Line 289  return isatty(fileno(stdout)); Line 352  return isatty(fileno(stdout));
352    
353  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
354  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
355  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
356    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
357    */
358    
359  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
360    
361  #ifndef STRICT  #ifndef STRICT
362  # define STRICT  # define STRICT
# Line 300  when it did not exist. */ Line 364  when it did not exist. */
364  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
365  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
366  #endif  #endif
367    
368    #include <windows.h>
369    
370  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
371  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
372  #endif  #endif
373    
 #include <windows.h>  
   
374  typedef struct directory_type  typedef struct directory_type
375  {  {
376  HANDLE handle;  HANDLE handle;
# Line 390  regular if they are not directories. */ Line 455  regular if they are not directories. */
455    
456  int isregfile(char *filename)  int isregfile(char *filename)
457  {  {
458  return !isdirectory(filename)  return !isdirectory(filename);
459  }  }
460    
461    
# Line 401  return !isdirectory(filename) Line 466  return !isdirectory(filename)
466  static BOOL  static BOOL
467  is_stdout_tty(void)  is_stdout_tty(void)
468  {  {
469  FALSE;  return FALSE;
470  }  }
471    
472    
# Line 414  FALSE; Line 479  FALSE;
479  typedef void directory_type;  typedef void directory_type;
480    
481  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
482  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
483  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
484  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
485    
486    
# Line 439  return FALSE; Line 504  return FALSE;
504    
505    
506    
507  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
508  /*************************************************  /*************************************************
509  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
510  *************************************************/  *************************************************/
# Line 462  return sys_errlist[n]; Line 527  return sys_errlist[n];
527    
528    
529  /*************************************************  /*************************************************
530    *             Find end of line                   *
531    *************************************************/
532    
533    /* The length of the endline sequence that is found is set via lenptr. This may
534    be zero at the very end of the file if there is no line-ending sequence there.
535    
536    Arguments:
537      p         current position in line
538      endptr    end of available data
539      lenptr    where to put the length of the eol sequence
540    
541    Returns:    pointer to the last byte of the line
542    */
543    
544    static char *
545    end_of_line(char *p, char *endptr, int *lenptr)
546    {
547    switch(endlinetype)
548      {
549      default:      /* Just in case */
550      case EL_LF:
551      while (p < endptr && *p != '\n') p++;
552      if (p < endptr)
553        {
554        *lenptr = 1;
555        return p + 1;
556        }
557      *lenptr = 0;
558      return endptr;
559    
560      case EL_CR:
561      while (p < endptr && *p != '\r') p++;
562      if (p < endptr)
563        {
564        *lenptr = 1;
565        return p + 1;
566        }
567      *lenptr = 0;
568      return endptr;
569    
570      case EL_CRLF:
571      for (;;)
572        {
573        while (p < endptr && *p != '\r') p++;
574        if (++p >= endptr)
575          {
576          *lenptr = 0;
577          return endptr;
578          }
579        if (*p == '\n')
580          {
581          *lenptr = 2;
582          return p + 1;
583          }
584        }
585      break;
586    
587      case EL_ANYCRLF:
588      while (p < endptr)
589        {
590        int extra = 0;
591        register int c = *((unsigned char *)p);
592    
593        if (utf8 && c >= 0xc0)
594          {
595          int gcii, gcss;
596          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
597          gcss = 6*extra;
598          c = (c & utf8_table3[extra]) << gcss;
599          for (gcii = 1; gcii <= extra; gcii++)
600            {
601            gcss -= 6;
602            c |= (p[gcii] & 0x3f) << gcss;
603            }
604          }
605    
606        p += 1 + extra;
607    
608        switch (c)
609          {
610          case 0x0a:    /* LF */
611          *lenptr = 1;
612          return p;
613    
614          case 0x0d:    /* CR */
615          if (p < endptr && *p == 0x0a)
616            {
617            *lenptr = 2;
618            p++;
619            }
620          else *lenptr = 1;
621          return p;
622    
623          default:
624          break;
625          }
626        }   /* End of loop for ANYCRLF case */
627    
628      *lenptr = 0;  /* Must have hit the end */
629      return endptr;
630    
631      case EL_ANY:
632      while (p < endptr)
633        {
634        int extra = 0;
635        register int c = *((unsigned char *)p);
636    
637        if (utf8 && c >= 0xc0)
638          {
639          int gcii, gcss;
640          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
641          gcss = 6*extra;
642          c = (c & utf8_table3[extra]) << gcss;
643          for (gcii = 1; gcii <= extra; gcii++)
644            {
645            gcss -= 6;
646            c |= (p[gcii] & 0x3f) << gcss;
647            }
648          }
649    
650        p += 1 + extra;
651    
652        switch (c)
653          {
654          case 0x0a:    /* LF */
655          case 0x0b:    /* VT */
656          case 0x0c:    /* FF */
657          *lenptr = 1;
658          return p;
659    
660          case 0x0d:    /* CR */
661          if (p < endptr && *p == 0x0a)
662            {
663            *lenptr = 2;
664            p++;
665            }
666          else *lenptr = 1;
667          return p;
668    
669          case 0x85:    /* NEL */
670          *lenptr = utf8? 2 : 1;
671          return p;
672    
673          case 0x2028:  /* LS */
674          case 0x2029:  /* PS */
675          *lenptr = 3;
676          return p;
677    
678          default:
679          break;
680          }
681        }   /* End of loop for ANY case */
682    
683      *lenptr = 0;  /* Must have hit the end */
684      return endptr;
685      }     /* End of overall switch */
686    }
687    
688    
689    
690    /*************************************************
691    *         Find start of previous line            *
692    *************************************************/
693    
694    /* This is called when looking back for before lines to print.
695    
696    Arguments:
697      p         start of the subsequent line
698      startptr  start of available data
699    
700    Returns:    pointer to the start of the previous line
701    */
702    
703    static char *
704    previous_line(char *p, char *startptr)
705    {
706    switch(endlinetype)
707      {
708      default:      /* Just in case */
709      case EL_LF:
710      p--;
711      while (p > startptr && p[-1] != '\n') p--;
712      return p;
713    
714      case EL_CR:
715      p--;
716      while (p > startptr && p[-1] != '\n') p--;
717      return p;
718    
719      case EL_CRLF:
720      for (;;)
721        {
722        p -= 2;
723        while (p > startptr && p[-1] != '\n') p--;
724        if (p <= startptr + 1 || p[-2] == '\r') return p;
725        }
726      return p;   /* But control should never get here */
727    
728      case EL_ANY:
729      case EL_ANYCRLF:
730      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
731      if (utf8) while ((*p & 0xc0) == 0x80) p--;
732    
733      while (p > startptr)
734        {
735        register int c;
736        char *pp = p - 1;
737    
738        if (utf8)
739          {
740          int extra = 0;
741          while ((*pp & 0xc0) == 0x80) pp--;
742          c = *((unsigned char *)pp);
743          if (c >= 0xc0)
744            {
745            int gcii, gcss;
746            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
747            gcss = 6*extra;
748            c = (c & utf8_table3[extra]) << gcss;
749            for (gcii = 1; gcii <= extra; gcii++)
750              {
751              gcss -= 6;
752              c |= (pp[gcii] & 0x3f) << gcss;
753              }
754            }
755          }
756        else c = *((unsigned char *)pp);
757    
758        if (endlinetype == EL_ANYCRLF) switch (c)
759          {
760          case 0x0a:    /* LF */
761          case 0x0d:    /* CR */
762          return p;
763    
764          default:
765          break;
766          }
767    
768        else switch (c)
769          {
770          case 0x0a:    /* LF */
771          case 0x0b:    /* VT */
772          case 0x0c:    /* FF */
773          case 0x0d:    /* CR */
774          case 0x85:    /* NEL */
775          case 0x2028:  /* LS */
776          case 0x2029:  /* PS */
777          return p;
778    
779          default:
780          break;
781          }
782    
783        p = pp;  /* Back one character */
784        }        /* End of loop for ANY case */
785    
786      return startptr;  /* Hit start of data */
787      }     /* End of overall switch */
788    }
789    
790    
791    
792    
793    
794    /*************************************************
795  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
796  *************************************************/  *************************************************/
797    
# Line 486  if (after_context > 0 && lastmatchnumber Line 816  if (after_context > 0 && lastmatchnumber
816    int count = 0;    int count = 0;
817    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
818      {      {
819        int ellength;
820      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
821      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
822      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
823      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
824      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
825      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
826      }      }
827    hyphenpending = TRUE;    hyphenpending = TRUE;
828    }    }
# Line 500  if (after_context > 0 && lastmatchnumber Line 831  if (after_context > 0 && lastmatchnumber
831    
832    
833  /*************************************************  /*************************************************
834    *   Apply patterns to subject till one matches   *
835    *************************************************/
836    
837    /* This function is called to run through all patterns, looking for a match. It
838    is used multiple times for the same subject when colouring is enabled, in order
839    to find all possible matches.
840    
841    Arguments:
842      matchptr    the start of the subject
843      length      the length of the subject to match
844      offsets     the offets vector to fill in
845      mrc         address of where to put the result of pcre_exec()
846    
847    Returns:      TRUE if there was a match
848                  FALSE if there was no match
849                  invert if there was a non-fatal error
850    */
851    
852    static BOOL
853    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
854    {
855    int i;
856    for (i = 0; i < pattern_count; i++)
857      {
858      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
859        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
860      if (*mrc >= 0) return TRUE;
861      if (*mrc == PCRE_ERROR_NOMATCH) continue;
862      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
863      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
864      fprintf(stderr, "this text:\n");
865      FWRITE(matchptr, 1, length, stderr);   /* In case binary zero included */
866      fprintf(stderr, "\n");
867      if (error_count == 0 &&
868          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
869        {
870        fprintf(stderr, "pcregrep: error %d means that a resource limit "
871          "was exceeded\n", *mrc);
872        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
873        }
874      if (error_count++ > 20)
875        {
876        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
877        exit(2);
878        }
879      return invert;    /* No more matching; don't show the line again */
880      }
881    
882    return FALSE;  /* No match, no errors */
883    }
884    
885    
886    
887    /*************************************************
888  *            Grep an individual file             *  *            Grep an individual file             *
889  *************************************************/  *************************************************/
890    
# Line 511  be in the middle third most of the time, Line 896  be in the middle third most of the time,
896  "before" context printing.  "before" context printing.
897    
898  Arguments:  Arguments:
899    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
900                   the gzFile pointer when reading is via libz
901                   the BZFILE pointer when reading is via libbz2
902      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
903    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
904                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
905                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
906    
907  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
908                 1 otherwise (no matches)                 1 otherwise (no matches)
909                   2 if there is a read error on a .bz2 file
910  */  */
911    
912  static int  static int
913  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
914  {  {
915  int rc = 1;  int rc = 1;
916  int linenumber = 1;  int linenumber = 1;
917  int lastmatchnumber = 0;  int lastmatchnumber = 0;
918  int count = 0;  int count = 0;
919  int offsets[99];  int filepos = 0;
920    int offsets[OFFSET_SIZE];
921  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
922  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
923  char *ptr = buffer;  char *ptr = buffer;
924  char *endptr;  char *endptr;
925  size_t bufflength;  size_t bufflength;
926  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
927    FILE *in = NULL;                    /* Ensure initialized */
928    
929    #ifdef SUPPORT_LIBZ
930    gzFile ingz = NULL;
931    #endif
932    
933    #ifdef SUPPORT_LIBBZ2
934    BZFILE *inbz2 = NULL;
935    #endif
936    
 /* Do the first read into the start of the buffer and set up the pointer to  
 end of what we have. */  
937    
938  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  /* Do the first read into the start of the buffer and set up the pointer to end
939    of what we have. In the case of libz, a non-zipped .gz file will be read as a
940    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
941    fail. */
942    
943    #ifdef SUPPORT_LIBZ
944    if (frtype == FR_LIBZ)
945      {
946      ingz = (gzFile)handle;
947      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
948      }
949    else
950    #endif
951    
952    #ifdef SUPPORT_LIBBZ2
953    if (frtype == FR_LIBBZ2)
954      {
955      inbz2 = (BZFILE *)handle;
956      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
957      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
958      }                                    /* without the cast it is unsigned. */
959    else
960    #endif
961    
962      {
963      in = (FILE *)handle;
964      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
965      }
966    
967  endptr = buffer + bufflength;  endptr = buffer + bufflength;
968    
969  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 548  way, the buffer is shifted left and re-f Line 973  way, the buffer is shifted left and re-f
973    
974  while (ptr < endptr)  while (ptr < endptr)
975    {    {
976    int i;    int endlinelength;
977    int mrc = 0;    int mrc = 0;
978    BOOL match = FALSE;    BOOL match;
979      char *matchptr = ptr;
980    char *t = ptr;    char *t = ptr;
981    size_t length, linelength;    size_t length, linelength;
982    
983    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
984    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
985    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
986    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
987    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
988    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
989      first line. */
990    linelength = 0;  
991    while (t < endptr && *t++ != '\n') linelength++;    t = end_of_line(t, endptr, &endlinelength);
992    length = multiline? endptr - ptr : linelength;    linelength = t - ptr - endlinelength;
993      length = multiline? (size_t)(endptr - ptr) : linelength;
994    /* Run through all the patterns until one matches. Note that we don't include  
995    the final newline in the subject string. */    /* Extra processing for Jeffrey Friedl's debugging. */
996    
997    for (i = 0; i < pattern_count; i++)  #ifdef JFRIEDL_DEBUG
998      {    if (jfriedl_XT || jfriedl_XR)
999      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,    {
1000        offsets, 99);        #include <sys/time.h>
1001      if (mrc >= 0) { match = TRUE; break; }        #include <time.h>
1002      if (mrc != PCRE_ERROR_NOMATCH)        struct timeval start_time, end_time;
1003        {        struct timezone dummy;
1004        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);        int i;
1005        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
1006        fprintf(stderr, "this line:\n");        if (jfriedl_XT)
1007        fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */        {
1008        fprintf(stderr, "\n");            unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1009        if (error_count == 0 &&            const char *orig = ptr;
1010            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))            ptr = malloc(newlen + 1);
1011          {            if (!ptr) {
1012          fprintf(stderr, "pcregrep: error %d means that a resource limit "                    printf("out of memory");
1013            "was exceeded\n", mrc);                    exit(2);
1014          fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");            }
1015          }            endptr = ptr;
1016        if (error_count++ > 20)            strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1017          {            for (i = 0; i < jfriedl_XT; i++) {
1018          fprintf(stderr, "pcregrep: too many errors - abandoned\n");                    strncpy(endptr, orig,  length);
1019          exit(2);                    endptr += length;
1020          }            }
1021        match = invert;    /* No more matching; don't show the line again */            strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1022        break;            length = newlen;
1023        }        }
1024      }  
1025          if (gettimeofday(&start_time, &dummy) != 0)
1026                  perror("bad gettimeofday");
1027    
1028    
1029          for (i = 0; i < jfriedl_XR; i++)
1030              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1031                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1032    
1033          if (gettimeofday(&end_time, &dummy) != 0)
1034                  perror("bad gettimeofday");
1035    
1036          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1037                          -
1038                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1039    
1040          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1041          return 0;
1042      }
1043    #endif
1044    
1045      /* We come back here after a match when the -o option (only_matching) is set,
1046      in order to find any further matches in the same line. */
1047    
1048      ONLY_MATCHING_RESTART:
1049    
1050      /* Run through all the patterns until one matches or there is an error other
1051      than NOMATCH. This code is in a subroutine so that it can be re-used for
1052      finding subsequent matches when colouring matched lines. */
1053    
1054      match = match_patterns(matchptr, length, offsets, &mrc);
1055    
1056    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1057    
# Line 614  while (ptr < endptr) Line 1070  while (ptr < endptr)
1070      /* If all we want is a file name, there is no need to scan any more lines      /* If all we want is a file name, there is no need to scan any more lines
1071      in the file. */      in the file. */
1072    
1073      else if (filenames == FN_ONLY)      else if (filenames == FN_MATCH_ONLY)
1074        {        {
1075        fprintf(stdout, "%s\n", printname);        fprintf(stdout, "%s\n", printname);
1076        return 0;        return 0;
# Line 625  while (ptr < endptr) Line 1081  while (ptr < endptr)
1081      else if (quiet) return 0;      else if (quiet) return 0;
1082    
1083      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
1084      does not pring any context. */      the --file-offsets and --line-offsets options output offsets for the
1085        matching substring (they both force --only-matching). None of these options
1086        prints any context. Afterwards, adjust the start and length, and then jump
1087        back to look for further matches in the same line. If we are in invert
1088        mode, however, nothing is printed - this could be still useful because the
1089        return code is set. */
1090    
1091      else if (only_matching)      else if (only_matching)
1092        {        {
1093        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1094        if (number) fprintf(stdout, "%d:", linenumber);          {
1095        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1096        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1097            if (line_offsets)
1098              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1099                offsets[1] - offsets[0]);
1100            else if (file_offsets)
1101              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1102                offsets[1] - offsets[0]);
1103            else
1104              {
1105              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1106              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1107              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1108              }
1109            fprintf(stdout, "\n");
1110            matchptr += offsets[1];
1111            length -= offsets[1];
1112            match = FALSE;
1113            goto ONLY_MATCHING_RESTART;
1114            }
1115        }        }
1116    
1117      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 646  while (ptr < endptr) Line 1125  while (ptr < endptr)
1125    
1126        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1127          {          {
1128            int ellength;
1129          int linecount = 0;          int linecount = 0;
1130          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1131    
1132          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1133            {            {
1134            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1135            linecount++;            linecount++;
1136            }            }
1137    
# Line 665  while (ptr < endptr) Line 1144  while (ptr < endptr)
1144            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1145            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1146            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1147            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1148            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1149            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1150            }            }
1151          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1152          }          }
# Line 693  while (ptr < endptr) Line 1172  while (ptr < endptr)
1172                 linecount < before_context)                 linecount < before_context)
1173            {            {
1174            linecount++;            linecount++;
1175            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1176            }            }
1177    
1178          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 702  while (ptr < endptr) Line 1180  while (ptr < endptr)
1180    
1181          while (p < ptr)          while (p < ptr)
1182            {            {
1183              int ellength;
1184            char *pp = p;            char *pp = p;
1185            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1186            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1187            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1188            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            FWRITE(p, 1, pp - p, stdout);
1189            p = pp + 1;            p = pp;
1190            }            }
1191          }          }
1192    
# Line 722  while (ptr < endptr) Line 1201  while (ptr < endptr)
1201    
1202        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1203        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1204        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1205        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1206          the match will always be before the first newline sequence. */
1207    
1208        if (multiline)        if (multiline)
1209          {          {
1210          char *endmatch = ptr + offsets[1];          int ellength;
1211          t = ptr;          char *endmatch = ptr;
1212          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          if (!invert)
1213          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1214          linelength = endmatch - ptr;            endmatch += offsets[1];
1215              t = ptr;
1216              while (t < endmatch)
1217                {
1218                t = end_of_line(t, endptr, &ellength);
1219                if (t <= endmatch) linenumber++; else break;
1220                }
1221              }
1222            endmatch = end_of_line(endmatch, endptr, &ellength);
1223            linelength = endmatch - ptr - ellength;
1224          }          }
1225    
1226        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 746  while (ptr < endptr) Line 1235  while (ptr < endptr)
1235          {          {
1236          int first = S_arg * 2;          int first = S_arg * 2;
1237          int last  = first + 1;          int last  = first + 1;
1238          fwrite(ptr, 1, offsets[first], stdout);          FWRITE(ptr, 1, offsets[first], stdout);
1239          fprintf(stdout, "X");          fprintf(stdout, "X");
1240          fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);          FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1241          }          }
1242        else        else
1243  #endif  #endif
1244    
1245        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1246          matches. */
1247    
1248        if (do_colour)        if (do_colour)
1249          {          {
1250          fwrite(ptr, 1, offsets[0], stdout);          int last_offset = 0;
1251            FWRITE(ptr, 1, offsets[0], stdout);
1252          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1253          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1254          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1255          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          for (;;)
1256              {
1257              last_offset += offsets[1];
1258              matchptr += offsets[1];
1259              length -= offsets[1];
1260              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1261              FWRITE(matchptr, 1, offsets[0], stdout);
1262              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1263              FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1264              fprintf(stdout, "%c[00m", 0x1b);
1265              }
1266            FWRITE(ptr + last_offset, 1,
1267              (linelength + endlinelength) - last_offset, stdout);
1268          }          }
       else fwrite(ptr, 1, linelength, stdout);  
1269    
1270        fprintf(stdout, "\n");        /* Not colouring; no need to search for further matches */
1271    
1272          else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1273        }        }
1274    
1275      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 775  while (ptr < endptr) Line 1279  while (ptr < endptr)
1279      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1280      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1281    
1282      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1283      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1284      }      }
1285    
1286    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1287      anything to be printed), we have to move on to the end of the match before
1288      proceeding. */
1289    
1290      if (multiline && invert && match)
1291        {
1292        int ellength;
1293        char *endmatch = ptr + offsets[1];
1294        t = ptr;
1295        while (t < endmatch)
1296          {
1297          t = end_of_line(t, endptr, &ellength);
1298          if (t <= endmatch) linenumber++; else break;
1299          }
1300        endmatch = end_of_line(endmatch, endptr, &ellength);
1301        linelength = endmatch - ptr - ellength;
1302        }
1303    
1304      /* Advance to after the newline and increment the line number. The file
1305      offset to the current line is maintained in filepos. */
1306    
1307    ptr += linelength + 1;    ptr += linelength + endlinelength;
1308      filepos += linelength + endlinelength;
1309    linenumber++;    linenumber++;
1310    
1311    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 803  while (ptr < endptr) Line 1327  while (ptr < endptr)
1327    
1328      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1329      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1330    
1331    #ifdef SUPPORT_LIBZ
1332        if (frtype == FR_LIBZ)
1333          bufflength = 2*MBUFTHIRD +
1334            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1335        else
1336    #endif
1337    
1338    #ifdef SUPPORT_LIBBZ2
1339        if (frtype == FR_LIBBZ2)
1340          bufflength = 2*MBUFTHIRD +
1341            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1342        else
1343    #endif
1344    
1345      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1346    
1347      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1348    
1349      /* Adjust any last match point */      /* Adjust any last match point */
# Line 834  if (filenames == FN_NOMATCH_ONLY) Line 1374  if (filenames == FN_NOMATCH_ONLY)
1374    
1375  if (count_only)  if (count_only)
1376    {    {
1377    if (printname != NULL) fprintf(stdout, "%s:", printname);    if (count > 0 || !omit_zero_count)
1378    fprintf(stdout, "%d\n", count);      {
1379        if (printname != NULL && filenames != FN_NONE)
1380          fprintf(stdout, "%s:", printname);
1381        fprintf(stdout, "%d\n", count);
1382        }
1383    }    }
1384    
1385  return rc;  return rc;
# Line 867  grep_or_recurse(char *pathname, BOOL dir Line 1411  grep_or_recurse(char *pathname, BOOL dir
1411  {  {
1412  int rc = 1;  int rc = 1;
1413  int sep;  int sep;
1414  FILE *in;  int frtype;
1415    int pathlen;
1416    void *handle;
1417    FILE *in = NULL;           /* Ensure initialized */
1418    
1419    #ifdef SUPPORT_LIBZ
1420    gzFile ingz = NULL;
1421    #endif
1422    
1423    #ifdef SUPPORT_LIBBZ2
1424    BZFILE *inbz2 = NULL;
1425    #endif
1426    
1427  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1428    
1429  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1430    {    {
1431    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1432      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1433        stdin_name : NULL);        stdin_name : NULL);
1434    }    }
1435    
   
1436  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1437  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1438  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1439    system-specific. */
1440    
1441  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1442    {    {
# Line 902  if ((sep = isdirectory(pathname)) != 0) Line 1457  if ((sep = isdirectory(pathname)) != 0)
1457    
1458      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1459        {        {
1460        int frc, blen;        int frc, nflen;
1461        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1462        blen = strlen(buffer);        nflen = strlen(nextfile);
1463    
1464        if (exclude_compiled != NULL &&        if (isdirectory(buffer))
1465            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)          {
1466          continue;          if (exclude_dir_compiled != NULL &&
1467                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1468        if (include_compiled != NULL &&            continue;
1469            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
1470          continue;          if (include_dir_compiled != NULL &&
1471                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1472              continue;
1473            }
1474          else
1475            {
1476            if (exclude_compiled != NULL &&
1477                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1478              continue;
1479    
1480            if (include_compiled != NULL &&
1481                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1482              continue;
1483            }
1484    
1485        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1486        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 935  skipping was not requested. The scan pro Line 1503  skipping was not requested. The scan pro
1503  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1504  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1505    
1506  in = fopen(pathname, "r");  pathlen = strlen(pathname);
1507  if (in == NULL)  
1508    /* Open using zlib if it is supported and the file name ends with .gz. */
1509    
1510    #ifdef SUPPORT_LIBZ
1511    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1512      {
1513      ingz = gzopen(pathname, "rb");
1514      if (ingz == NULL)
1515        {
1516        if (!silent)
1517          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1518            strerror(errno));
1519        return 2;
1520        }
1521      handle = (void *)ingz;
1522      frtype = FR_LIBZ;
1523      }
1524    else
1525    #endif
1526    
1527    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1528    
1529    #ifdef SUPPORT_LIBBZ2
1530    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1531      {
1532      inbz2 = BZ2_bzopen(pathname, "rb");
1533      handle = (void *)inbz2;
1534      frtype = FR_LIBBZ2;
1535      }
1536    else
1537    #endif
1538    
1539    /* Otherwise use plain fopen(). The label is so that we can come back here if
1540    an attempt to read a .bz2 file indicates that it really is a plain file. */
1541    
1542    #ifdef SUPPORT_LIBBZ2
1543    PLAIN_FILE:
1544    #endif
1545      {
1546      in = fopen(pathname, "rb");
1547      handle = (void *)in;
1548      frtype = FR_PLAIN;
1549      }
1550    
1551    /* All the opening methods return errno when they fail. */
1552    
1553    if (handle == NULL)
1554    {    {
1555    if (!silent)    if (!silent)
1556      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 944  if (in == NULL) Line 1558  if (in == NULL)
1558    return 2;    return 2;
1559    }    }
1560    
1561  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1562    
1563    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1564    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1565    
1566    /* Close in an appropriate manner. */
1567    
1568    #ifdef SUPPORT_LIBZ
1569    if (frtype == FR_LIBZ)
1570      gzclose(ingz);
1571    else
1572    #endif
1573    
1574    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1575    read failed. If the error indicates that the file isn't in fact bzipped, try
1576    again as a normal file. */
1577    
1578    #ifdef SUPPORT_LIBBZ2
1579    if (frtype == FR_LIBBZ2)
1580      {
1581      if (rc == 2)
1582        {
1583        int errnum;
1584        const char *err = BZ2_bzerror(inbz2, &errnum);
1585        if (errnum == BZ_DATA_ERROR_MAGIC)
1586          {
1587          BZ2_bzclose(inbz2);
1588          goto PLAIN_FILE;
1589          }
1590        else if (!silent)
1591          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1592            pathname, err);
1593        }
1594      BZ2_bzclose(inbz2);
1595      }
1596    else
1597    #endif
1598    
1599    /* Normal file close */
1600    
1601  fclose(in);  fclose(in);
1602    
1603    /* Pass back the yield from pcregrep(). */
1604    
1605  return rc;  return rc;
1606  }  }
1607    
# Line 968  for (op = optionlist; op->one_char != 0; Line 1622  for (op = optionlist; op->one_char != 0;
1622    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1623    }    }
1624  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1625  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1626      "options.\n");
1627  return rc;  return rc;
1628  }  }
1629    
# Line 987  option_item *op; Line 1642  option_item *op;
1642  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1643  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1644  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1645  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1646  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1647    #ifdef SUPPORT_LIBZ
1648    printf("Files whose names end in .gz are read using zlib.\n");
1649    #endif
1650    
1651    #ifdef SUPPORT_LIBBZ2
1652    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1653    #endif
1654    
1655    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1656    printf("Other files and the standard input are read as plain files.\n\n");
1657    #else
1658    printf("All files are read as plain files, without any interpretation.\n\n");
1659    #endif
1660    
1661    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1662  printf("Options:\n");  printf("Options:\n");
1663    
1664  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 997  for (op = optionlist; op->one_char != 0; Line 1666  for (op = optionlist; op->one_char != 0;
1666    int n;    int n;
1667    char s[4];    char s[4];
1668    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1669    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1670    if (n < 1) n = 1;    if (n < 1) n = 1;
1671    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1672    }    }
# Line 1023  handle_option(int letter, int options) Line 1691  handle_option(int letter, int options)
1691  {  {
1692  switch(letter)  switch(letter)
1693    {    {
1694      case N_FOFFSETS: file_offsets = TRUE; break;
1695    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1696      case N_LOFFSETS: line_offsets = number = TRUE; break;
1697    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1698    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1699    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
1700    case 'h': filenames = FN_NONE; break;    case 'h': filenames = FN_NONE; break;
1701    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1702    case 'l': filenames = FN_ONLY; break;    case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1703    case 'L': filenames = FN_NOMATCH_ONLY; break;    case 'L': filenames = FN_NOMATCH_ONLY; break;
1704    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;    case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1705    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
# Line 1037  switch(letter) Line 1707  switch(letter)
1707    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1708    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1709    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1710    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1711    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1712    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1713    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1714    
1715    case 'V':    case 'V':
1716    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1717    exit(0);    exit(0);
1718    break;    break;
1719    
# Line 1120  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1789  sprintf(buffer, "%s%.*s%s", prefix[proce
1789    suffix[process_options]);    suffix[process_options]);
1790  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1791    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1792  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1793      {
1794      pattern_count++;
1795      return TRUE;
1796      }
1797    
1798  /* Handle compile errors */  /* Handle compile errors */
1799    
# Line 1152  return FALSE; Line 1825  return FALSE;
1825  *************************************************/  *************************************************/
1826    
1827  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1828  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1829    
1830  Arguments:  Arguments:
1831    pattern        the pattern string    pattern        the pattern string
# Line 1170  compile_pattern(char *pattern, int optio Line 1843  compile_pattern(char *pattern, int optio
1843  {  {
1844  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1845    {    {
1846      char *eop = pattern + strlen(pattern);
1847    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1848    for(;;)    for(;;)
1849      {      {
1850      char *p = strchr(pattern, '\n');      int ellength;
1851      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1852        if (ellength == 0)
1853        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1854      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1855      pattern = p + 1;      pattern = p;
1856      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1857        return FALSE;        return FALSE;
1858      }      }
# Line 1200  int i, j; Line 1875  int i, j;
1875  int rc = 1;  int rc = 1;
1876  int pcre_options = 0;  int pcre_options = 0;
1877  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1878    int hint_count = 0;
1879  int errptr;  int errptr;
1880  BOOL only_one_at_top;  BOOL only_one_at_top;
1881  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
1882  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1883  const char *error;  const char *error;
1884    
1885    /* Set the default line ending value from the default in the PCRE library;
1886    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1887    Note that the return values from pcre_config(), though derived from the ASCII
1888    codes, are the same in EBCDIC environments, so we must use the actual values
1889    rather than escapes such as as '\r'. */
1890    
1891    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1892    switch(i)
1893      {
1894      default:               newline = (char *)"lf"; break;
1895      case 13:               newline = (char *)"cr"; break;
1896      case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1897      case -1:               newline = (char *)"any"; break;
1898      case -2:               newline = (char *)"anycrlf"; break;
1899      }
1900    
1901  /* Process the options */  /* Process the options */
1902    
1903  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1245  for (i = 1; i < argc; i++) Line 1937  for (i = 1; i < argc; i++)
1937      Some options have variations in the long name spelling: specifically, we      Some options have variations in the long name spelling: specifically, we
1938      allow "regexp" because GNU grep allows it, though I personally go along      allow "regexp" because GNU grep allows it, though I personally go along
1939      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".      with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1940      These options are entered in the table as "regex(p)". No option is in both      These options are entered in the table as "regex(p)". Options can be in
1941      these categories, fortunately. */      both these categories. */
1942    
1943      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1944        {        {
1945        char *opbra = strchr(op->long_name, '(');        char *opbra = strchr(op->long_name, '(');
1946        char *equals = strchr(op->long_name, '=');        char *equals = strchr(op->long_name, '=');
1947        if (opbra == NULL)     /* Not a (p) case */  
1948          /* Handle options with only one spelling of the name */
1949    
1950          if (opbra == NULL)     /* Does not contain '(' */
1951          {          {
1952          if (equals == NULL)  /* Not thing=data case */          if (equals == NULL)  /* Not thing=data case */
1953            {            {
# Line 1261  for (i = 1; i < argc; i++) Line 1956  for (i = 1; i < argc; i++)
1956          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1957            {            {
1958            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1959            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1960            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1961              {              {
1962              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1274  for (i = 1; i < argc; i++) Line 1969  for (i = 1; i < argc; i++)
1969              }              }
1970            }            }
1971          }          }
1972        else                   /* Special case xxxx(p) */  
1973          /* Handle options with an alternate spelling of the name */
1974    
1975          else
1976          {          {
1977          char buff1[24];          char buff1[24];
1978          char buff2[24];          char buff2[24];
1979    
1980          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1981            int fulllen = strchr(op->long_name, ')') - op->long_name + 1;
1982            int arglen = (argequals == NULL || equals == NULL)?
1983              (int)strlen(arg) : argequals - arg;
1984    
1985          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1986          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
1987            opbra + 1);  
1988          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strncmp(arg, buff1, arglen) == 0 ||
1989               strncmp(arg, buff2, arglen) == 0)
1990              {
1991              if (equals != NULL && argequals != NULL)
1992                {
1993                option_data = argequals;
1994                if (*option_data == '=')
1995                  {
1996                  option_data++;
1997                  longopwasequals = TRUE;
1998                  }
1999                }
2000            break;            break;
2001              }
2002          }          }
2003        }        }
2004    
# Line 1294  for (i = 1; i < argc; i++) Line 2009  for (i = 1; i < argc; i++)
2009        }        }
2010      }      }
2011    
2012      /* Jeffrey Friedl's debugging harness uses these additional options which
2013      are not in the right form for putting in the option table because they use
2014      only one hyphen, yet are more than one character long. By putting them
2015      separately here, they will not get displayed as part of the help() output,
2016      but I don't think Jeffrey will care about that. */
2017    
2018    #ifdef JFRIEDL_DEBUG
2019      else if (strcmp(argv[i], "-pre") == 0) {
2020              jfriedl_prefix = argv[++i];
2021              continue;
2022      } else if (strcmp(argv[i], "-post") == 0) {
2023              jfriedl_postfix = argv[++i];
2024              continue;
2025      } else if (strcmp(argv[i], "-XT") == 0) {
2026              sscanf(argv[++i], "%d", &jfriedl_XT);
2027              continue;
2028      } else if (strcmp(argv[i], "-XR") == 0) {
2029              sscanf(argv[++i], "%d", &jfriedl_XR);
2030              continue;
2031      }
2032    #endif
2033    
2034    
2035    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
2036    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
2037    
# Line 1333  for (i = 1; i < argc; i++) Line 2071  for (i = 1; i < argc; i++)
2071    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2072    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
2073    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r" and
2074    Jeffrey Friedl's special debugging option. */    Jeffrey Friedl's special -S debugging option. */
2075    
2076    if (*option_data == 0 &&    if (*option_data == 0 &&
2077        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1416  if (both_context > 0) Line 2154  if (both_context > 0)
2154    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2155    }    }
2156    
2157    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2158    However, the latter two set the only_matching flag. */
2159    
2160    if ((only_matching && (file_offsets || line_offsets)) ||
2161        (file_offsets && line_offsets))
2162      {
2163      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2164        "and/or --line-offsets\n");
2165      exit(usage(2));
2166      }
2167    
2168    if (file_offsets || line_offsets) only_matching = TRUE;
2169    
2170  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2171  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2172    
# Line 1465  if (colour_option != NULL && strcmp(colo Line 2216  if (colour_option != NULL && strcmp(colo
2216      }      }
2217    }    }
2218    
2219    /* Interpret the newline type; the default settings are Unix-like. */
2220    
2221    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2222      {
2223      pcre_options |= PCRE_NEWLINE_CR;
2224      endlinetype = EL_CR;
2225      }
2226    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2227      {
2228      pcre_options |= PCRE_NEWLINE_LF;
2229      endlinetype = EL_LF;
2230      }
2231    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2232      {
2233      pcre_options |= PCRE_NEWLINE_CRLF;
2234      endlinetype = EL_CRLF;
2235      }
2236    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2237      {
2238      pcre_options |= PCRE_NEWLINE_ANY;
2239      endlinetype = EL_ANY;
2240      }
2241    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2242      {
2243      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2244      endlinetype = EL_ANYCRLF;
2245      }
2246    else
2247      {
2248      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2249      return 2;
2250      }
2251    
2252  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
2253    
2254  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1490  if (DEE_option != NULL) Line 2274  if (DEE_option != NULL)
2274      }      }
2275    }    }
2276    
2277  /* Check the value for Jeff Friedl's debugging option. */  /* Check the values for Jeffrey Friedl's debugging options. */
2278    
2279  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2280  if (S_arg > 9)  if (S_arg > 9)
# Line 1498  if (S_arg > 9) Line 2282  if (S_arg > 9)
2282    fprintf(stderr, "pcregrep: bad value for -S option\n");    fprintf(stderr, "pcregrep: bad value for -S option\n");
2283    return 2;    return 2;
2284    }    }
2285    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2286      {
2287      if (jfriedl_XT == 0) jfriedl_XT = 1;
2288      if (jfriedl_XR == 0) jfriedl_XR = 1;
2289      }
2290  #endif  #endif
2291    
2292  /* Get memory to store the pattern and hints lists. */  /* Get memory to store the pattern and hints lists. */
# Line 1508  hints_list = (pcre_extra **)malloc(MAX_P Line 2297  hints_list = (pcre_extra **)malloc(MAX_P
2297  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
2298    {    {
2299    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2300    return 2;    goto EXIT2;
2301    }    }
2302    
2303  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1527  for (j = 0; j < cmd_pattern_count; j++) Line 2316  for (j = 0; j < cmd_pattern_count; j++)
2316    {    {
2317    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
2318         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2319      return 2;      goto EXIT2;
2320    }    }
2321    
2322  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1551  if (pattern_filename != NULL) Line 2340  if (pattern_filename != NULL)
2340        {        {
2341        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2342          strerror(errno));          strerror(errno));
2343        return 2;        goto EXIT2;
2344        }        }
2345      filename = pattern_filename;      filename = pattern_filename;
2346      }      }
# Line 1564  if (pattern_filename != NULL) Line 2353  if (pattern_filename != NULL)
2353      linenumber++;      linenumber++;
2354      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2355      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2356        return 2;        goto EXIT2;
2357      }      }
2358    
2359    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1580  for (j = 0; j < pattern_count; j++) Line 2369  for (j = 0; j < pattern_count; j++)
2369      char s[16];      char s[16];
2370      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2371      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2372      return 2;      goto EXIT2;
2373      }      }
2374      hint_count++;
2375    }    }
2376    
2377  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1594  if (exclude_pattern != NULL) Line 2384  if (exclude_pattern != NULL)
2384      {      {
2385      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2386        errptr, error);        errptr, error);
2387      return 2;      goto EXIT2;
2388      }      }
2389    }    }
2390    
# Line 1606  if (include_pattern != NULL) Line 2396  if (include_pattern != NULL)
2396      {      {
2397      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2398        errptr, error);        errptr, error);
2399      return 2;      goto EXIT2;
2400        }
2401      }
2402    
2403    if (exclude_dir_pattern != NULL)
2404      {
2405      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2406        pcretables);
2407      if (exclude_dir_compiled == NULL)
2408        {
2409        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2410          errptr, error);
2411        goto EXIT2;
2412        }
2413      }
2414    
2415    if (include_dir_pattern != NULL)
2416      {
2417      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2418        pcretables);
2419      if (include_dir_compiled == NULL)
2420        {
2421        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2422          errptr, error);
2423        goto EXIT2;
2424      }      }
2425    }    }
2426    
2427  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2428    
2429  if (i >= argc)  if (i >= argc)
2430    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2431      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2432      goto EXIT;
2433      }
2434    
2435  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2436  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1630  for (; i < argc; i++) Line 2447  for (; i < argc; i++)
2447      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2448    }    }
2449    
2450    EXIT:
2451    if (pattern_list != NULL)
2452      {
2453      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2454      free(pattern_list);
2455      }
2456    if (hints_list != NULL)
2457      {
2458      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2459      free(hints_list);
2460      }
2461  return rc;  return rc;
2462    
2463    EXIT2:
2464    rc = 2;
2465    goto EXIT;
2466  }  }
2467    
2468  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.87  
changed lines
  Added in v.515

  ViewVC Help
Powered by ViewVC 1.1.5