/[pcre]/code/tags/pcre-8.01/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-8.01/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 89 by nigel, Sat Feb 24 21:41:27 2007 UTC revision 377 by ph10, Sun Mar 1 12:07:19 2009 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.2 09-Jan-2006"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    
75  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 65  typedef int BOOL; Line 78  typedef int BOOL;
78  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
79  #endif  #endif
80    
   
81  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
82  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
83  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
84    
85  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86    
87    /* File reading styles */
88    
89    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90    
91  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
92    
93  enum { dee_READ, dee_SKIP, dee_RECURSE };  enum { dee_READ, dee_SKIP, dee_RECURSE };
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 99  enum { DEE_READ, DEE_SKIP };
99  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
100  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
101    
102    /* Line ending types */
103    
104    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105    
106    
107    
108  /*************************************************  /*************************************************
# Line 100  static const char *jfriedl_prefix = ""; Line 120  static const char *jfriedl_prefix = "";
120  static const char *jfriedl_postfix = "";  static const char *jfriedl_postfix = "";
121  #endif  #endif
122    
123    static int  endlinetype;
124    
125  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
126  static char *colour_option = NULL;  static char *colour_option = NULL;
127  static char *dee_option = NULL;  static char *dee_option = NULL;
128  static char *DEE_option = NULL;  static char *DEE_option = NULL;
129    static char *newline = NULL;
130  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
131  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
132  static char *locale = NULL;  static char *locale = NULL;
# Line 111  static char *locale = NULL; Line 134  static char *locale = NULL;
134  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
135    
136  static int  pattern_count = 0;  static int  pattern_count = 0;
137  static pcre **pattern_list;  static pcre **pattern_list = NULL;
138  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
139    
140  static char *include_pattern = NULL;  static char *include_pattern = NULL;
141  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
142    static char *include_dir_pattern = NULL;
143    static char *exclude_dir_pattern = NULL;
144    
145  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
146  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
147    static pcre *include_dir_compiled = NULL;
148    static pcre *exclude_dir_compiled = NULL;
149    
150  static int after_context = 0;  static int after_context = 0;
151  static int before_context = 0;  static int before_context = 0;
# Line 131  static int process_options = 0; Line 158  static int process_options = 0;
158    
159  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
160  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
161    static BOOL file_offsets = FALSE;
162  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
163  static BOOL invert = FALSE;  static BOOL invert = FALSE;
164    static BOOL line_offsets = FALSE;
165  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
166  static BOOL number = FALSE;  static BOOL number = FALSE;
167  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
168  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
169  static BOOL silent = FALSE;  static BOOL silent = FALSE;
170    static BOOL utf8 = FALSE;
171    
172  /* Structure for options and list of them */  /* Structure for options and list of them */
173    
# Line 155  typedef struct option_item { Line 185  typedef struct option_item {
185  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
186  used to identify them. */  used to identify them. */
187    
188  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
189  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
190  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
191  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
192  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
193  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
194  #define N_NULL      (-7)  #define N_LABEL        (-7)
195    #define N_LOCALE       (-8)
196    #define N_NULL         (-9)
197    #define N_LOFFSETS     (-10)
198    #define N_FOFFSETS     (-11)
199    
200  static option_item optionlist[] = {  static option_item optionlist[] = {
201    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 177  static option_item optionlist[] = { Line 211  static option_item optionlist[] = {
211    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
212    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
213    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
214      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
215    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
216    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
217    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
218    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
219    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
220    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
221      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
222    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
223    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
224      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
225    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
226    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
227    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
228    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
229    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
230    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
231      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
232      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
233  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
234    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
235  #endif  #endif
# Line 214  static const char *prefix[] = { Line 253  static const char *prefix[] = {
253  static const char *suffix[] = {  static const char *suffix[] = {
254    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
255    
256    /* UTF-8 tables - used only when the newline setting is "any". */
257    
258    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
259    
260    const char utf8_table4[] = {
261      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
262      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
264      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
265    
266    
267    
268  /*************************************************  /*************************************************
# Line 226  although at present the only ones are fo Line 275  although at present the only ones are fo
275    
276  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
277    
278  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
279  #include <sys/types.h>  #include <sys/types.h>
280  #include <sys/stat.h>  #include <sys/stat.h>
281  #include <dirent.h>  #include <dirent.h>
# Line 258  for (;;) Line 307  for (;;)
307    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
308      return dent->d_name;      return dent->d_name;
309    }    }
310  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
311  }  }
312    
313  static void  static void
# Line 293  return isatty(fileno(stdout)); Line 342  return isatty(fileno(stdout));
342    
343  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
344  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
345  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
346    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
347    */
348    
349  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
350    
351  #ifndef STRICT  #ifndef STRICT
352  # define STRICT  # define STRICT
# Line 304  when it did not exist. */ Line 354  when it did not exist. */
354  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
355  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
356  #endif  #endif
357    
358    #include <windows.h>
359    
360  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
361  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
362  #endif  #endif
363    
 #include <windows.h>  
   
364  typedef struct directory_type  typedef struct directory_type
365  {  {
366  HANDLE handle;  HANDLE handle;
# Line 394  regular if they are not directories. */ Line 445  regular if they are not directories. */
445    
446  int isregfile(char *filename)  int isregfile(char *filename)
447  {  {
448  return !isdirectory(filename)  return !isdirectory(filename);
449  }  }
450    
451    
# Line 405  return !isdirectory(filename) Line 456  return !isdirectory(filename)
456  static BOOL  static BOOL
457  is_stdout_tty(void)  is_stdout_tty(void)
458  {  {
459  FALSE;  return FALSE;
460  }  }
461    
462    
# Line 418  FALSE; Line 469  FALSE;
469  typedef void directory_type;  typedef void directory_type;
470    
471  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
472  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
473  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
474  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
475    
476    
# Line 443  return FALSE; Line 494  return FALSE;
494    
495    
496    
497  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
498  /*************************************************  /*************************************************
499  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
500  *************************************************/  *************************************************/
# Line 466  return sys_errlist[n]; Line 517  return sys_errlist[n];
517    
518    
519  /*************************************************  /*************************************************
520    *             Find end of line                   *
521    *************************************************/
522    
523    /* The length of the endline sequence that is found is set via lenptr. This may
524    be zero at the very end of the file if there is no line-ending sequence there.
525    
526    Arguments:
527      p         current position in line
528      endptr    end of available data
529      lenptr    where to put the length of the eol sequence
530    
531    Returns:    pointer to the last byte of the line
532    */
533    
534    static char *
535    end_of_line(char *p, char *endptr, int *lenptr)
536    {
537    switch(endlinetype)
538      {
539      default:      /* Just in case */
540      case EL_LF:
541      while (p < endptr && *p != '\n') p++;
542      if (p < endptr)
543        {
544        *lenptr = 1;
545        return p + 1;
546        }
547      *lenptr = 0;
548      return endptr;
549    
550      case EL_CR:
551      while (p < endptr && *p != '\r') p++;
552      if (p < endptr)
553        {
554        *lenptr = 1;
555        return p + 1;
556        }
557      *lenptr = 0;
558      return endptr;
559    
560      case EL_CRLF:
561      for (;;)
562        {
563        while (p < endptr && *p != '\r') p++;
564        if (++p >= endptr)
565          {
566          *lenptr = 0;
567          return endptr;
568          }
569        if (*p == '\n')
570          {
571          *lenptr = 2;
572          return p + 1;
573          }
574        }
575      break;
576    
577      case EL_ANYCRLF:
578      while (p < endptr)
579        {
580        int extra = 0;
581        register int c = *((unsigned char *)p);
582    
583        if (utf8 && c >= 0xc0)
584          {
585          int gcii, gcss;
586          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
587          gcss = 6*extra;
588          c = (c & utf8_table3[extra]) << gcss;
589          for (gcii = 1; gcii <= extra; gcii++)
590            {
591            gcss -= 6;
592            c |= (p[gcii] & 0x3f) << gcss;
593            }
594          }
595    
596        p += 1 + extra;
597    
598        switch (c)
599          {
600          case 0x0a:    /* LF */
601          *lenptr = 1;
602          return p;
603    
604          case 0x0d:    /* CR */
605          if (p < endptr && *p == 0x0a)
606            {
607            *lenptr = 2;
608            p++;
609            }
610          else *lenptr = 1;
611          return p;
612    
613          default:
614          break;
615          }
616        }   /* End of loop for ANYCRLF case */
617    
618      *lenptr = 0;  /* Must have hit the end */
619      return endptr;
620    
621      case EL_ANY:
622      while (p < endptr)
623        {
624        int extra = 0;
625        register int c = *((unsigned char *)p);
626    
627        if (utf8 && c >= 0xc0)
628          {
629          int gcii, gcss;
630          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
631          gcss = 6*extra;
632          c = (c & utf8_table3[extra]) << gcss;
633          for (gcii = 1; gcii <= extra; gcii++)
634            {
635            gcss -= 6;
636            c |= (p[gcii] & 0x3f) << gcss;
637            }
638          }
639    
640        p += 1 + extra;
641    
642        switch (c)
643          {
644          case 0x0a:    /* LF */
645          case 0x0b:    /* VT */
646          case 0x0c:    /* FF */
647          *lenptr = 1;
648          return p;
649    
650          case 0x0d:    /* CR */
651          if (p < endptr && *p == 0x0a)
652            {
653            *lenptr = 2;
654            p++;
655            }
656          else *lenptr = 1;
657          return p;
658    
659          case 0x85:    /* NEL */
660          *lenptr = utf8? 2 : 1;
661          return p;
662    
663          case 0x2028:  /* LS */
664          case 0x2029:  /* PS */
665          *lenptr = 3;
666          return p;
667    
668          default:
669          break;
670          }
671        }   /* End of loop for ANY case */
672    
673      *lenptr = 0;  /* Must have hit the end */
674      return endptr;
675      }     /* End of overall switch */
676    }
677    
678    
679    
680    /*************************************************
681    *         Find start of previous line            *
682    *************************************************/
683    
684    /* This is called when looking back for before lines to print.
685    
686    Arguments:
687      p         start of the subsequent line
688      startptr  start of available data
689    
690    Returns:    pointer to the start of the previous line
691    */
692    
693    static char *
694    previous_line(char *p, char *startptr)
695    {
696    switch(endlinetype)
697      {
698      default:      /* Just in case */
699      case EL_LF:
700      p--;
701      while (p > startptr && p[-1] != '\n') p--;
702      return p;
703    
704      case EL_CR:
705      p--;
706      while (p > startptr && p[-1] != '\n') p--;
707      return p;
708    
709      case EL_CRLF:
710      for (;;)
711        {
712        p -= 2;
713        while (p > startptr && p[-1] != '\n') p--;
714        if (p <= startptr + 1 || p[-2] == '\r') return p;
715        }
716      return p;   /* But control should never get here */
717    
718      case EL_ANY:
719      case EL_ANYCRLF:
720      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
721      if (utf8) while ((*p & 0xc0) == 0x80) p--;
722    
723      while (p > startptr)
724        {
725        register int c;
726        char *pp = p - 1;
727    
728        if (utf8)
729          {
730          int extra = 0;
731          while ((*pp & 0xc0) == 0x80) pp--;
732          c = *((unsigned char *)pp);
733          if (c >= 0xc0)
734            {
735            int gcii, gcss;
736            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
737            gcss = 6*extra;
738            c = (c & utf8_table3[extra]) << gcss;
739            for (gcii = 1; gcii <= extra; gcii++)
740              {
741              gcss -= 6;
742              c |= (pp[gcii] & 0x3f) << gcss;
743              }
744            }
745          }
746        else c = *((unsigned char *)pp);
747    
748        if (endlinetype == EL_ANYCRLF) switch (c)
749          {
750          case 0x0a:    /* LF */
751          case 0x0d:    /* CR */
752          return p;
753    
754          default:
755          break;
756          }
757    
758        else switch (c)
759          {
760          case 0x0a:    /* LF */
761          case 0x0b:    /* VT */
762          case 0x0c:    /* FF */
763          case 0x0d:    /* CR */
764          case 0x85:    /* NEL */
765          case 0x2028:  /* LS */
766          case 0x2029:  /* PS */
767          return p;
768    
769          default:
770          break;
771          }
772    
773        p = pp;  /* Back one character */
774        }        /* End of loop for ANY case */
775    
776      return startptr;  /* Hit start of data */
777      }     /* End of overall switch */
778    }
779    
780    
781    
782    
783    
784    /*************************************************
785  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
786  *************************************************/  *************************************************/
787    
# Line 490  if (after_context > 0 && lastmatchnumber Line 806  if (after_context > 0 && lastmatchnumber
806    int count = 0;    int count = 0;
807    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
808      {      {
809        int ellength;
810      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
811      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
812      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
813      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
814      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
815      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
816      }      }
817    hyphenpending = TRUE;    hyphenpending = TRUE;
818    }    }
# Line 515  be in the middle third most of the time, Line 832  be in the middle third most of the time,
832  "before" context printing.  "before" context printing.
833    
834  Arguments:  Arguments:
835    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
836                   the gzFile pointer when reading is via libz
837                   the BZFILE pointer when reading is via libbz2
838      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
839    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
840                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
841                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
842    
843  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
844                 1 otherwise (no matches)                 1 otherwise (no matches)
845                   2 if there is a read error on a .bz2 file
846  */  */
847    
848  static int  static int
849  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
850  {  {
851  int rc = 1;  int rc = 1;
852  int linenumber = 1;  int linenumber = 1;
853  int lastmatchnumber = 0;  int lastmatchnumber = 0;
854  int count = 0;  int count = 0;
855    int filepos = 0;
856  int offsets[99];  int offsets[99];
857  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
858  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
# Line 538  char *ptr = buffer; Line 860  char *ptr = buffer;
860  char *endptr;  char *endptr;
861  size_t bufflength;  size_t bufflength;
862  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
863    FILE *in = NULL;                    /* Ensure initialized */
864    
865    #ifdef SUPPORT_LIBZ
866    gzFile ingz = NULL;
867    #endif
868    
869    #ifdef SUPPORT_LIBBZ2
870    BZFILE *inbz2 = NULL;
871    #endif
872    
873    
874  /* Do the first read into the start of the buffer and set up the pointer to  /* Do the first read into the start of the buffer and set up the pointer to end
875  end of what we have. */  of what we have. In the case of libz, a non-zipped .gz file will be read as a
876    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
877    fail. */
878    
879    #ifdef SUPPORT_LIBZ
880    if (frtype == FR_LIBZ)
881      {
882      ingz = (gzFile)handle;
883      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
884      }
885    else
886    #endif
887    
888    #ifdef SUPPORT_LIBBZ2
889    if (frtype == FR_LIBBZ2)
890      {
891      inbz2 = (BZFILE *)handle;
892      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
893      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
894      }                                    /* without the cast it is unsigned. */
895    else
896    #endif
897    
898      {
899      in = (FILE *)handle;
900      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
901      }
902    
 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  
903  endptr = buffer + bufflength;  endptr = buffer + bufflength;
904    
905  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 552  way, the buffer is shifted left and re-f Line 909  way, the buffer is shifted left and re-f
909    
910  while (ptr < endptr)  while (ptr < endptr)
911    {    {
912    int i;    int i, endlinelength;
913    int mrc = 0;    int mrc = 0;
914    BOOL match = FALSE;    BOOL match = FALSE;
915      char *matchptr = ptr;
916    char *t = ptr;    char *t = ptr;
917    size_t length, linelength;    size_t length, linelength;
918    
# Line 565  while (ptr < endptr) Line 923  while (ptr < endptr)
923    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
924    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
925    
926    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
927    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
928    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
   
929    
930    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
931    
# Line 618  while (ptr < endptr) Line 975  while (ptr < endptr)
975    }    }
976  #endif  #endif
977    
978      /* We come back here after a match when the -o option (only_matching) is set,
979      in order to find any further matches in the same line. */
980    
981      ONLY_MATCHING_RESTART:
982    
983    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
984    the final newline in the subject string. */    the final newline in the subject string. */
985    
986    for (i = 0; i < pattern_count; i++)    for (i = 0; i < pattern_count; i++)
987      {      {
988      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
989        offsets, 99);        offsets, 99);
990      if (mrc >= 0) { match = TRUE; break; }      if (mrc >= 0) { match = TRUE; break; }
991      if (mrc != PCRE_ERROR_NOMATCH)      if (mrc != PCRE_ERROR_NOMATCH)
# Line 632  while (ptr < endptr) Line 993  while (ptr < endptr)
993        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
994        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
995        fprintf(stderr, "this line:\n");        fprintf(stderr, "this line:\n");
996        fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */        fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
997        fprintf(stderr, "\n");        fprintf(stderr, "\n");
998        if (error_count == 0 &&        if (error_count == 0 &&
999            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
# Line 679  while (ptr < endptr) Line 1040  while (ptr < endptr)
1040      else if (quiet) return 0;      else if (quiet) return 0;
1041    
1042      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
1043      does not pring any context. */      the --file-offsets and --line-offsets options output offsets for the
1044        matching substring (they both force --only-matching). None of these options
1045        prints any context. Afterwards, adjust the start and length, and then jump
1046        back to look for further matches in the same line. If we are in invert
1047        mode, however, nothing is printed - this could be still useful because the
1048        return code is set. */
1049    
1050      else if (only_matching)      else if (only_matching)
1051        {        {
1052        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1053        if (number) fprintf(stdout, "%d:", linenumber);          {
1054        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1055        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1056            if (line_offsets)
1057              fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1058                offsets[1] - offsets[0]);
1059            else if (file_offsets)
1060              fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1061                offsets[1] - offsets[0]);
1062            else
1063              {
1064              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1065              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1066              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1067              }
1068            fprintf(stdout, "\n");
1069            matchptr += offsets[1];
1070            length -= offsets[1];
1071            match = FALSE;
1072            goto ONLY_MATCHING_RESTART;
1073            }
1074        }        }
1075    
1076      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 700  while (ptr < endptr) Line 1084  while (ptr < endptr)
1084    
1085        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1086          {          {
1087            int ellength;
1088          int linecount = 0;          int linecount = 0;
1089          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1090    
1091          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1092            {            {
1093            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1094            linecount++;            linecount++;
1095            }            }
1096    
# Line 719  while (ptr < endptr) Line 1103  while (ptr < endptr)
1103            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1104            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1105            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1106            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1107            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1108            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1109            }            }
1110          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1111          }          }
# Line 747  while (ptr < endptr) Line 1131  while (ptr < endptr)
1131                 linecount < before_context)                 linecount < before_context)
1132            {            {
1133            linecount++;            linecount++;
1134            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1135            }            }
1136    
1137          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 756  while (ptr < endptr) Line 1139  while (ptr < endptr)
1139    
1140          while (p < ptr)          while (p < ptr)
1141            {            {
1142              int ellength;
1143            char *pp = p;            char *pp = p;
1144            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1145            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1146            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1147            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            fwrite(p, 1, pp - p, stdout);
1148            p = pp + 1;            p = pp;
1149            }            }
1150          }          }
1151    
# Line 776  while (ptr < endptr) Line 1160  while (ptr < endptr)
1160    
1161        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1162        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1163        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1164        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1165          the match will always be before the first newline sequence. */
1166    
1167        if (multiline)        if (multiline)
1168          {          {
1169          char *endmatch = ptr + offsets[1];          int ellength;
1170          t = ptr;          char *endmatch = ptr;
1171          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          if (!invert)
1172          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1173          linelength = endmatch - ptr;            endmatch += offsets[1];
1174              t = ptr;
1175              while (t < endmatch)
1176                {
1177                t = end_of_line(t, endptr, &ellength);
1178                if (t <= endmatch) linenumber++; else break;
1179                }
1180              }
1181            endmatch = end_of_line(endmatch, endptr, &ellength);
1182            linelength = endmatch - ptr - ellength;
1183          }          }
1184    
1185        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 815  while (ptr < endptr) Line 1209  while (ptr < endptr)
1209          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1210          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1211          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1212          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1213              stdout);
1214          }          }
1215        else fwrite(ptr, 1, linelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
   
       fprintf(stdout, "\n");  
1216        }        }
1217    
1218      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 829  while (ptr < endptr) Line 1222  while (ptr < endptr)
1222      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1223      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1224    
1225      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1226      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1227      }      }
1228    
1229    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1230      anything to be printed), we have to move on to the end of the match before
1231      proceeding. */
1232    
1233      if (multiline && invert && match)
1234        {
1235        int ellength;
1236        char *endmatch = ptr + offsets[1];
1237        t = ptr;
1238        while (t < endmatch)
1239          {
1240          t = end_of_line(t, endptr, &ellength);
1241          if (t <= endmatch) linenumber++; else break;
1242          }
1243        endmatch = end_of_line(endmatch, endptr, &ellength);
1244        linelength = endmatch - ptr - ellength;
1245        }
1246    
1247      /* Advance to after the newline and increment the line number. The file
1248      offset to the current line is maintained in filepos. */
1249    
1250    ptr += linelength + 1;    ptr += linelength + endlinelength;
1251      filepos += linelength + endlinelength;
1252    linenumber++;    linenumber++;
1253    
1254    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 857  while (ptr < endptr) Line 1270  while (ptr < endptr)
1270    
1271      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1272      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1273    
1274    #ifdef SUPPORT_LIBZ
1275        if (frtype == FR_LIBZ)
1276          bufflength = 2*MBUFTHIRD +
1277            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1278        else
1279    #endif
1280    
1281    #ifdef SUPPORT_LIBBZ2
1282        if (frtype == FR_LIBBZ2)
1283          bufflength = 2*MBUFTHIRD +
1284            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1285        else
1286    #endif
1287    
1288      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1289    
1290      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1291    
1292      /* Adjust any last match point */      /* Adjust any last match point */
# Line 921  grep_or_recurse(char *pathname, BOOL dir Line 1350  grep_or_recurse(char *pathname, BOOL dir
1350  {  {
1351  int rc = 1;  int rc = 1;
1352  int sep;  int sep;
1353  FILE *in;  int frtype;
1354    int pathlen;
1355    void *handle;
1356    FILE *in = NULL;           /* Ensure initialized */
1357    
1358    #ifdef SUPPORT_LIBZ
1359    gzFile ingz = NULL;
1360    #endif
1361    
1362    #ifdef SUPPORT_LIBBZ2
1363    BZFILE *inbz2 = NULL;
1364    #endif
1365    
1366  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1367    
1368  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1369    {    {
1370    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1371      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1372        stdin_name : NULL);        stdin_name : NULL);
1373    }    }
1374    
   
1375  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1376  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1377  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1378    system-specific. */
1379    
1380  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1381    {    {
# Line 956  if ((sep = isdirectory(pathname)) != 0) Line 1396  if ((sep = isdirectory(pathname)) != 0)
1396    
1397      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1398        {        {
1399        int frc, blen;        int frc, nflen;
1400        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1401        blen = strlen(buffer);        nflen = strlen(nextfile);
1402    
1403          if (isdirectory(buffer))
1404            {
1405            if (exclude_dir_compiled != NULL &&
1406                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1407              continue;
1408    
1409            if (include_dir_compiled != NULL &&
1410                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1411              continue;
1412            }
1413          else
1414            {
1415            if (exclude_compiled != NULL &&
1416                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1417              continue;
1418    
1419        if (exclude_compiled != NULL &&          if (include_compiled != NULL &&
1420            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)              pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1421          continue;            continue;
1422            }
       if (include_compiled != NULL &&  
           pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
         continue;  
1423    
1424        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1425        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 989  skipping was not requested. The scan pro Line 1442  skipping was not requested. The scan pro
1442  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1443  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1444    
1445  in = fopen(pathname, "r");  pathlen = strlen(pathname);
1446  if (in == NULL)  
1447    /* Open using zlib if it is supported and the file name ends with .gz. */
1448    
1449    #ifdef SUPPORT_LIBZ
1450    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1451      {
1452      ingz = gzopen(pathname, "rb");
1453      if (ingz == NULL)
1454        {
1455        if (!silent)
1456          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1457            strerror(errno));
1458        return 2;
1459        }
1460      handle = (void *)ingz;
1461      frtype = FR_LIBZ;
1462      }
1463    else
1464    #endif
1465    
1466    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1467    
1468    #ifdef SUPPORT_LIBBZ2
1469    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1470      {
1471      inbz2 = BZ2_bzopen(pathname, "rb");
1472      handle = (void *)inbz2;
1473      frtype = FR_LIBBZ2;
1474      }
1475    else
1476    #endif
1477    
1478    /* Otherwise use plain fopen(). The label is so that we can come back here if
1479    an attempt to read a .bz2 file indicates that it really is a plain file. */
1480    
1481    #ifdef SUPPORT_LIBBZ2
1482    PLAIN_FILE:
1483    #endif
1484      {
1485      in = fopen(pathname, "r");
1486      handle = (void *)in;
1487      frtype = FR_PLAIN;
1488      }
1489    
1490    /* All the opening methods return errno when they fail. */
1491    
1492    if (handle == NULL)
1493    {    {
1494    if (!silent)    if (!silent)
1495      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 998  if (in == NULL) Line 1497  if (in == NULL)
1497    return 2;    return 2;
1498    }    }
1499    
1500  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1501    
1502    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1503    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1504    
1505    /* Close in an appropriate manner. */
1506    
1507    #ifdef SUPPORT_LIBZ
1508    if (frtype == FR_LIBZ)
1509      gzclose(ingz);
1510    else
1511    #endif
1512    
1513    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1514    read failed. If the error indicates that the file isn't in fact bzipped, try
1515    again as a normal file. */
1516    
1517    #ifdef SUPPORT_LIBBZ2
1518    if (frtype == FR_LIBBZ2)
1519      {
1520      if (rc == 2)
1521        {
1522        int errnum;
1523        const char *err = BZ2_bzerror(inbz2, &errnum);
1524        if (errnum == BZ_DATA_ERROR_MAGIC)
1525          {
1526          BZ2_bzclose(inbz2);
1527          goto PLAIN_FILE;
1528          }
1529        else if (!silent)
1530          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1531            pathname, err);
1532        }
1533      BZ2_bzclose(inbz2);
1534      }
1535    else
1536    #endif
1537    
1538    /* Normal file close */
1539    
1540  fclose(in);  fclose(in);
1541    
1542    /* Pass back the yield from pcregrep(). */
1543    
1544  return rc;  return rc;
1545  }  }
1546    
# Line 1022  for (op = optionlist; op->one_char != 0; Line 1561  for (op = optionlist; op->one_char != 0;
1561    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1562    }    }
1563  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1564  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1565      "options.\n");
1566  return rc;  return rc;
1567  }  }
1568    
# Line 1041  option_item *op; Line 1581  option_item *op;
1581  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1582  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1583  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1584  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1585  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1586    #ifdef SUPPORT_LIBZ
1587    printf("Files whose names end in .gz are read using zlib.\n");
1588    #endif
1589    
1590    #ifdef SUPPORT_LIBBZ2
1591    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1592    #endif
1593    
1594    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1595    printf("Other files and the standard input are read as plain files.\n\n");
1596    #else
1597    printf("All files are read as plain files, without any interpretation.\n\n");
1598    #endif
1599    
1600    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1601  printf("Options:\n");  printf("Options:\n");
1602    
1603  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 1051  for (op = optionlist; op->one_char != 0; Line 1605  for (op = optionlist; op->one_char != 0;
1605    int n;    int n;
1606    char s[4];    char s[4];
1607    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1608    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1609    if (n < 1) n = 1;    if (n < 1) n = 1;
1610    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1611    }    }
# Line 1077  handle_option(int letter, int options) Line 1630  handle_option(int letter, int options)
1630  {  {
1631  switch(letter)  switch(letter)
1632    {    {
1633      case N_FOFFSETS: file_offsets = TRUE; break;
1634    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1635      case N_LOFFSETS: line_offsets = number = TRUE; break;
1636    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1637    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1638    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
# Line 1091  switch(letter) Line 1646  switch(letter)
1646    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1647    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1648    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1649    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1650    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1651    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1652    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1653    
1654    case 'V':    case 'V':
1655    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1656    exit(0);    exit(0);
1657    break;    break;
1658    
# Line 1174  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1728  sprintf(buffer, "%s%.*s%s", prefix[proce
1728    suffix[process_options]);    suffix[process_options]);
1729  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1730    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1731  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1732      {
1733      pattern_count++;
1734      return TRUE;
1735      }
1736    
1737  /* Handle compile errors */  /* Handle compile errors */
1738    
# Line 1206  return FALSE; Line 1764  return FALSE;
1764  *************************************************/  *************************************************/
1765    
1766  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1767  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1768    
1769  Arguments:  Arguments:
1770    pattern        the pattern string    pattern        the pattern string
# Line 1224  compile_pattern(char *pattern, int optio Line 1782  compile_pattern(char *pattern, int optio
1782  {  {
1783  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1784    {    {
1785      char *eop = pattern + strlen(pattern);
1786    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1787    for(;;)    for(;;)
1788      {      {
1789      char *p = strchr(pattern, '\n');      int ellength;
1790      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1791        if (ellength == 0)
1792        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1793      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1794      pattern = p + 1;      pattern = p;
1795      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1796        return FALSE;        return FALSE;
1797      }      }
# Line 1254  int i, j; Line 1814  int i, j;
1814  int rc = 1;  int rc = 1;
1815  int pcre_options = 0;  int pcre_options = 0;
1816  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1817    int hint_count = 0;
1818  int errptr;  int errptr;
1819  BOOL only_one_at_top;  BOOL only_one_at_top;
1820  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
1821  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1822  const char *error;  const char *error;
1823    
1824    /* Set the default line ending value from the default in the PCRE library;
1825    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1826    */
1827    
1828    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1829    switch(i)
1830      {
1831      default:                 newline = (char *)"lf"; break;
1832      case '\r':               newline = (char *)"cr"; break;
1833      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1834      case -1:                 newline = (char *)"any"; break;
1835      case -2:                 newline = (char *)"anycrlf"; break;
1836      }
1837    
1838  /* Process the options */  /* Process the options */
1839    
1840  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1315  for (i = 1; i < argc; i++) Line 1890  for (i = 1; i < argc; i++)
1890          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1891            {            {
1892            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1893            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1894            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1895              {              {
1896              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1334  for (i = 1; i < argc; i++) Line 1909  for (i = 1; i < argc; i++)
1909          char buff2[24];          char buff2[24];
1910          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1911          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1912          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1913            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1914          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1915            break;            break;
1916          }          }
# Line 1494  if (both_context > 0) Line 2069  if (both_context > 0)
2069    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2070    }    }
2071    
2072    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2073    However, the latter two set the only_matching flag. */
2074    
2075    if ((only_matching && (file_offsets || line_offsets)) ||
2076        (file_offsets && line_offsets))
2077      {
2078      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2079        "and/or --line-offsets\n");
2080      exit(usage(2));
2081      }
2082    
2083    if (file_offsets || line_offsets) only_matching = TRUE;
2084    
2085  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2086  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2087    
# Line 1543  if (colour_option != NULL && strcmp(colo Line 2131  if (colour_option != NULL && strcmp(colo
2131      }      }
2132    }    }
2133    
2134    /* Interpret the newline type; the default settings are Unix-like. */
2135    
2136    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2137      {
2138      pcre_options |= PCRE_NEWLINE_CR;
2139      endlinetype = EL_CR;
2140      }
2141    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2142      {
2143      pcre_options |= PCRE_NEWLINE_LF;
2144      endlinetype = EL_LF;
2145      }
2146    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2147      {
2148      pcre_options |= PCRE_NEWLINE_CRLF;
2149      endlinetype = EL_CRLF;
2150      }
2151    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2152      {
2153      pcre_options |= PCRE_NEWLINE_ANY;
2154      endlinetype = EL_ANY;
2155      }
2156    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2157      {
2158      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2159      endlinetype = EL_ANYCRLF;
2160      }
2161    else
2162      {
2163      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2164      return 2;
2165      }
2166    
2167  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
2168    
2169  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1591  hints_list = (pcre_extra **)malloc(MAX_P Line 2212  hints_list = (pcre_extra **)malloc(MAX_P
2212  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
2213    {    {
2214    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2215    return 2;    goto EXIT2;
2216    }    }
2217    
2218  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1610  for (j = 0; j < cmd_pattern_count; j++) Line 2231  for (j = 0; j < cmd_pattern_count; j++)
2231    {    {
2232    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
2233         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2234      return 2;      goto EXIT2;
2235    }    }
2236    
2237  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1634  if (pattern_filename != NULL) Line 2255  if (pattern_filename != NULL)
2255        {        {
2256        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2257          strerror(errno));          strerror(errno));
2258        return 2;        goto EXIT2;
2259        }        }
2260      filename = pattern_filename;      filename = pattern_filename;
2261      }      }
# Line 1647  if (pattern_filename != NULL) Line 2268  if (pattern_filename != NULL)
2268      linenumber++;      linenumber++;
2269      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2270      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2271        return 2;        goto EXIT2;
2272      }      }
2273    
2274    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1663  for (j = 0; j < pattern_count; j++) Line 2284  for (j = 0; j < pattern_count; j++)
2284      char s[16];      char s[16];
2285      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2286      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2287      return 2;      goto EXIT2;
2288      }      }
2289      hint_count++;
2290    }    }
2291    
2292  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1677  if (exclude_pattern != NULL) Line 2299  if (exclude_pattern != NULL)
2299      {      {
2300      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2301        errptr, error);        errptr, error);
2302      return 2;      goto EXIT2;
2303      }      }
2304    }    }
2305    
# Line 1689  if (include_pattern != NULL) Line 2311  if (include_pattern != NULL)
2311      {      {
2312      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2313        errptr, error);        errptr, error);
2314      return 2;      goto EXIT2;
2315        }
2316      }
2317    
2318    if (exclude_dir_pattern != NULL)
2319      {
2320      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2321        pcretables);
2322      if (exclude_dir_compiled == NULL)
2323        {
2324        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2325          errptr, error);
2326        goto EXIT2;
2327        }
2328      }
2329    
2330    if (include_dir_pattern != NULL)
2331      {
2332      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2333        pcretables);
2334      if (include_dir_compiled == NULL)
2335        {
2336        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2337          errptr, error);
2338        goto EXIT2;
2339      }      }
2340    }    }
2341    
2342  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2343    
2344  if (i >= argc)  if (i >= argc)
2345    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2346      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2347      goto EXIT;
2348      }
2349    
2350  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2351  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1713  for (; i < argc; i++) Line 2362  for (; i < argc; i++)
2362      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2363    }    }
2364    
2365    EXIT:
2366    if (pattern_list != NULL)
2367      {
2368      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2369      free(pattern_list);
2370      }
2371    if (hints_list != NULL)
2372      {
2373      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2374      free(hints_list);
2375      }
2376  return rc;  return rc;
2377    
2378    EXIT2:
2379    rc = 2;
2380    goto EXIT;
2381  }  }
2382    
2383  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.89  
changed lines
  Added in v.377

  ViewVC Help
Powered by ViewVC 1.1.5