/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 305 by ph10, Sun Jan 20 20:07:32 2008 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <locale.h>  #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
# Line 46  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55  #include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60    #endif
61    
62    #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
 #include "config.h"  
66  #include "pcre.h"  #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 70  POSSIBILITY OF SUCH DAMAGE.
70    
71  typedef int BOOL;  typedef int BOOL;
72    
 #define VERSION "4.2 09-Jan-2006"  
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    
75  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 65  typedef int BOOL; Line 78  typedef int BOOL;
78  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
79  #endif  #endif
80    
   
81  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
82  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
83  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
84    
85  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86    
87    /* File reading styles */
88    
89    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90    
91  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
92    
93  enum { dee_READ, dee_SKIP, dee_RECURSE };  enum { dee_READ, dee_SKIP, dee_RECURSE };
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 99  enum { DEE_READ, DEE_SKIP };
99  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
100  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
101    
102    /* Line ending types */
103    
104    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105    
106    
107    
108  /*************************************************  /*************************************************
# Line 94  regular code. */ Line 114  regular code. */
114    
115  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
116  static int S_arg = -1;  static int S_arg = -1;
117    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
118    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
119    static const char *jfriedl_prefix = "";
120    static const char *jfriedl_postfix = "";
121  #endif  #endif
122    
123    static int  endlinetype;
124    
125  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
126  static char *colour_option = NULL;  static char *colour_option = NULL;
127  static char *dee_option = NULL;  static char *dee_option = NULL;
128  static char *DEE_option = NULL;  static char *DEE_option = NULL;
129    static char *newline = NULL;
130  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
131  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
132  static char *locale = NULL;  static char *locale = NULL;
# Line 107  static char *locale = NULL; Line 134  static char *locale = NULL;
134  static const unsigned char *pcretables = NULL;  static const unsigned char *pcretables = NULL;
135    
136  static int  pattern_count = 0;  static int  pattern_count = 0;
137  static pcre **pattern_list;  static pcre **pattern_list = NULL;
138  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
139    
140  static char *include_pattern = NULL;  static char *include_pattern = NULL;
141  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
# Line 127  static int process_options = 0; Line 154  static int process_options = 0;
154    
155  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
156  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
157    static BOOL file_offsets = FALSE;
158  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
159  static BOOL invert = FALSE;  static BOOL invert = FALSE;
160    static BOOL line_offsets = FALSE;
161  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
162  static BOOL number = FALSE;  static BOOL number = FALSE;
163  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
164  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
165  static BOOL silent = FALSE;  static BOOL silent = FALSE;
166    static BOOL utf8 = FALSE;
167    
168  /* Structure for options and list of them */  /* Structure for options and list of them */
169    
# Line 158  used to identify them. */ Line 188  used to identify them. */
188  #define N_LABEL     (-5)  #define N_LABEL     (-5)
189  #define N_LOCALE    (-6)  #define N_LOCALE    (-6)
190  #define N_NULL      (-7)  #define N_NULL      (-7)
191    #define N_LOFFSETS  (-8)
192    #define N_FOFFSETS  (-9)
193    
194  static option_item optionlist[] = {  static option_item optionlist[] = {
195    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 173  static option_item optionlist[] = { Line 205  static option_item optionlist[] = {
205    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
206    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
207    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
208      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
209    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
210    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
211    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
212    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
213    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
214    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
215      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
216    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
217    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
218      { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
219    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
220    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
221    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 210  static const char *prefix[] = { Line 245  static const char *prefix[] = {
245  static const char *suffix[] = {  static const char *suffix[] = {
246    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
247    
248    /* UTF-8 tables - used only when the newline setting is "any". */
249    
250    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
251    
252    const char utf8_table4[] = {
253      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
254      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
255      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
256      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
257    
258    
259    
260  /*************************************************  /*************************************************
# Line 222  although at present the only ones are fo Line 267  although at present the only ones are fo
267    
268  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
269    
270  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
271  #include <sys/types.h>  #include <sys/types.h>
272  #include <sys/stat.h>  #include <sys/stat.h>
273  #include <dirent.h>  #include <dirent.h>
# Line 254  for (;;) Line 299  for (;;)
299    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
300      return dent->d_name;      return dent->d_name;
301    }    }
302  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
303  }  }
304    
305  static void  static void
# Line 289  return isatty(fileno(stdout)); Line 334  return isatty(fileno(stdout));
334    
335  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
336  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
337  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
338    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
339    */
340    
341  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
342    
343  #ifndef STRICT  #ifndef STRICT
344  # define STRICT  # define STRICT
# Line 300  when it did not exist. */ Line 346  when it did not exist. */
346  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
347  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
348  #endif  #endif
349    
350    #include <windows.h>
351    
352  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
353  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
354  #endif  #endif
355    
 #include <windows.h>  
   
356  typedef struct directory_type  typedef struct directory_type
357  {  {
358  HANDLE handle;  HANDLE handle;
# Line 390  regular if they are not directories. */ Line 437  regular if they are not directories. */
437    
438  int isregfile(char *filename)  int isregfile(char *filename)
439  {  {
440  return !isdirectory(filename)  return !isdirectory(filename);
441  }  }
442    
443    
# Line 401  return !isdirectory(filename) Line 448  return !isdirectory(filename)
448  static BOOL  static BOOL
449  is_stdout_tty(void)  is_stdout_tty(void)
450  {  {
451  FALSE;  return FALSE;
452  }  }
453    
454    
# Line 414  FALSE; Line 461  FALSE;
461  typedef void directory_type;  typedef void directory_type;
462    
463  int isdirectory(char *filename) { return 0; }  int isdirectory(char *filename) { return 0; }
464  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
465  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
466  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
467    
468    
# Line 439  return FALSE; Line 486  return FALSE;
486    
487    
488    
489  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
490  /*************************************************  /*************************************************
491  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
492  *************************************************/  *************************************************/
# Line 462  return sys_errlist[n]; Line 509  return sys_errlist[n];
509    
510    
511  /*************************************************  /*************************************************
512    *             Find end of line                   *
513    *************************************************/
514    
515    /* The length of the endline sequence that is found is set via lenptr. This may
516    be zero at the very end of the file if there is no line-ending sequence there.
517    
518    Arguments:
519      p         current position in line
520      endptr    end of available data
521      lenptr    where to put the length of the eol sequence
522    
523    Returns:    pointer to the last byte of the line
524    */
525    
526    static char *
527    end_of_line(char *p, char *endptr, int *lenptr)
528    {
529    switch(endlinetype)
530      {
531      default:      /* Just in case */
532      case EL_LF:
533      while (p < endptr && *p != '\n') p++;
534      if (p < endptr)
535        {
536        *lenptr = 1;
537        return p + 1;
538        }
539      *lenptr = 0;
540      return endptr;
541    
542      case EL_CR:
543      while (p < endptr && *p != '\r') p++;
544      if (p < endptr)
545        {
546        *lenptr = 1;
547        return p + 1;
548        }
549      *lenptr = 0;
550      return endptr;
551    
552      case EL_CRLF:
553      for (;;)
554        {
555        while (p < endptr && *p != '\r') p++;
556        if (++p >= endptr)
557          {
558          *lenptr = 0;
559          return endptr;
560          }
561        if (*p == '\n')
562          {
563          *lenptr = 2;
564          return p + 1;
565          }
566        }
567      break;
568    
569      case EL_ANYCRLF:
570      while (p < endptr)
571        {
572        int extra = 0;
573        register int c = *((unsigned char *)p);
574    
575        if (utf8 && c >= 0xc0)
576          {
577          int gcii, gcss;
578          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
579          gcss = 6*extra;
580          c = (c & utf8_table3[extra]) << gcss;
581          for (gcii = 1; gcii <= extra; gcii++)
582            {
583            gcss -= 6;
584            c |= (p[gcii] & 0x3f) << gcss;
585            }
586          }
587    
588        p += 1 + extra;
589    
590        switch (c)
591          {
592          case 0x0a:    /* LF */
593          *lenptr = 1;
594          return p;
595    
596          case 0x0d:    /* CR */
597          if (p < endptr && *p == 0x0a)
598            {
599            *lenptr = 2;
600            p++;
601            }
602          else *lenptr = 1;
603          return p;
604    
605          default:
606          break;
607          }
608        }   /* End of loop for ANYCRLF case */
609    
610      *lenptr = 0;  /* Must have hit the end */
611      return endptr;
612    
613      case EL_ANY:
614      while (p < endptr)
615        {
616        int extra = 0;
617        register int c = *((unsigned char *)p);
618    
619        if (utf8 && c >= 0xc0)
620          {
621          int gcii, gcss;
622          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
623          gcss = 6*extra;
624          c = (c & utf8_table3[extra]) << gcss;
625          for (gcii = 1; gcii <= extra; gcii++)
626            {
627            gcss -= 6;
628            c |= (p[gcii] & 0x3f) << gcss;
629            }
630          }
631    
632        p += 1 + extra;
633    
634        switch (c)
635          {
636          case 0x0a:    /* LF */
637          case 0x0b:    /* VT */
638          case 0x0c:    /* FF */
639          *lenptr = 1;
640          return p;
641    
642          case 0x0d:    /* CR */
643          if (p < endptr && *p == 0x0a)
644            {
645            *lenptr = 2;
646            p++;
647            }
648          else *lenptr = 1;
649          return p;
650    
651          case 0x85:    /* NEL */
652          *lenptr = utf8? 2 : 1;
653          return p;
654    
655          case 0x2028:  /* LS */
656          case 0x2029:  /* PS */
657          *lenptr = 3;
658          return p;
659    
660          default:
661          break;
662          }
663        }   /* End of loop for ANY case */
664    
665      *lenptr = 0;  /* Must have hit the end */
666      return endptr;
667      }     /* End of overall switch */
668    }
669    
670    
671    
672    /*************************************************
673    *         Find start of previous line            *
674    *************************************************/
675    
676    /* This is called when looking back for before lines to print.
677    
678    Arguments:
679      p         start of the subsequent line
680      startptr  start of available data
681    
682    Returns:    pointer to the start of the previous line
683    */
684    
685    static char *
686    previous_line(char *p, char *startptr)
687    {
688    switch(endlinetype)
689      {
690      default:      /* Just in case */
691      case EL_LF:
692      p--;
693      while (p > startptr && p[-1] != '\n') p--;
694      return p;
695    
696      case EL_CR:
697      p--;
698      while (p > startptr && p[-1] != '\n') p--;
699      return p;
700    
701      case EL_CRLF:
702      for (;;)
703        {
704        p -= 2;
705        while (p > startptr && p[-1] != '\n') p--;
706        if (p <= startptr + 1 || p[-2] == '\r') return p;
707        }
708      return p;   /* But control should never get here */
709    
710      case EL_ANY:
711      case EL_ANYCRLF:
712      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
713      if (utf8) while ((*p & 0xc0) == 0x80) p--;
714    
715      while (p > startptr)
716        {
717        register int c;
718        char *pp = p - 1;
719    
720        if (utf8)
721          {
722          int extra = 0;
723          while ((*pp & 0xc0) == 0x80) pp--;
724          c = *((unsigned char *)pp);
725          if (c >= 0xc0)
726            {
727            int gcii, gcss;
728            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
729            gcss = 6*extra;
730            c = (c & utf8_table3[extra]) << gcss;
731            for (gcii = 1; gcii <= extra; gcii++)
732              {
733              gcss -= 6;
734              c |= (pp[gcii] & 0x3f) << gcss;
735              }
736            }
737          }
738        else c = *((unsigned char *)pp);
739    
740        if (endlinetype == EL_ANYCRLF) switch (c)
741          {
742          case 0x0a:    /* LF */
743          case 0x0d:    /* CR */
744          return p;
745    
746          default:
747          break;
748          }
749    
750        else switch (c)
751          {
752          case 0x0a:    /* LF */
753          case 0x0b:    /* VT */
754          case 0x0c:    /* FF */
755          case 0x0d:    /* CR */
756          case 0x85:    /* NEL */
757          case 0x2028:  /* LS */
758          case 0x2029:  /* PS */
759          return p;
760    
761          default:
762          break;
763          }
764    
765        p = pp;  /* Back one character */
766        }        /* End of loop for ANY case */
767    
768      return startptr;  /* Hit start of data */
769      }     /* End of overall switch */
770    }
771    
772    
773    
774    
775    
776    /*************************************************
777  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
778  *************************************************/  *************************************************/
779    
# Line 486  if (after_context > 0 && lastmatchnumber Line 798  if (after_context > 0 && lastmatchnumber
798    int count = 0;    int count = 0;
799    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
800      {      {
801        int ellength;
802      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
803      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
804      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
805      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
806      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
807      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
808      }      }
809    hyphenpending = TRUE;    hyphenpending = TRUE;
810    }    }
# Line 511  be in the middle third most of the time, Line 824  be in the middle third most of the time,
824  "before" context printing.  "before" context printing.
825    
826  Arguments:  Arguments:
827    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
828                   the gzFile pointer when reading is via libz
829                   the BZFILE pointer when reading is via libbz2
830      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
831    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
832                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
833                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
834    
835  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
836                 1 otherwise (no matches)                 1 otherwise (no matches)
837                   2 if there is a read error on a .bz2 file
838  */  */
839    
840  static int  static int
841  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
842  {  {
843  int rc = 1;  int rc = 1;
844  int linenumber = 1;  int linenumber = 1;
845  int lastmatchnumber = 0;  int lastmatchnumber = 0;
846  int count = 0;  int count = 0;
847    int filepos = 0;
848  int offsets[99];  int offsets[99];
849  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
850  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
# Line 534  char *ptr = buffer; Line 852  char *ptr = buffer;
852  char *endptr;  char *endptr;
853  size_t bufflength;  size_t bufflength;
854  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
855    FILE *in = NULL;                    /* Ensure initialized */
856    
857    #ifdef SUPPORT_LIBZ
858    gzFile ingz = NULL;
859    #endif
860    
861    #ifdef SUPPORT_LIBBZ2
862    BZFILE *inbz2 = NULL;
863    #endif
864    
865    
866    /* Do the first read into the start of the buffer and set up the pointer to end
867    of what we have. In the case of libz, a non-zipped .gz file will be read as a
868    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
869    fail. */
870    
871    #ifdef SUPPORT_LIBZ
872    if (frtype == FR_LIBZ)
873      {
874      ingz = (gzFile)handle;
875      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
876      }
877    else
878    #endif
879    
880    #ifdef SUPPORT_LIBBZ2
881    if (frtype == FR_LIBBZ2)
882      {
883      inbz2 = (BZFILE *)handle;
884      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
885      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
886      }                                    /* without the cast it is unsigned. */
887    else
888    #endif
889    
890  /* Do the first read into the start of the buffer and set up the pointer to    {
891  end of what we have. */    in = (FILE *)handle;
892      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
893      }
894    
 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  
895  endptr = buffer + bufflength;  endptr = buffer + bufflength;
896    
897  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 548  way, the buffer is shifted left and re-f Line 901  way, the buffer is shifted left and re-f
901    
902  while (ptr < endptr)  while (ptr < endptr)
903    {    {
904    int i;    int i, endlinelength;
905    int mrc = 0;    int mrc = 0;
906    BOOL match = FALSE;    BOOL match = FALSE;
907      char *matchptr = ptr;
908    char *t = ptr;    char *t = ptr;
909    size_t length, linelength;    size_t length, linelength;
910    
# Line 561  while (ptr < endptr) Line 915  while (ptr < endptr)
915    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
916    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
917    
918    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
919    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
920    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
921    
922      /* Extra processing for Jeffrey Friedl's debugging. */
923    
924    #ifdef JFRIEDL_DEBUG
925      if (jfriedl_XT || jfriedl_XR)
926      {
927          #include <sys/time.h>
928          #include <time.h>
929          struct timeval start_time, end_time;
930          struct timezone dummy;
931    
932          if (jfriedl_XT)
933          {
934              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
935              const char *orig = ptr;
936              ptr = malloc(newlen + 1);
937              if (!ptr) {
938                      printf("out of memory");
939                      exit(2);
940              }
941              endptr = ptr;
942              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
943              for (i = 0; i < jfriedl_XT; i++) {
944                      strncpy(endptr, orig,  length);
945                      endptr += length;
946              }
947              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
948              length = newlen;
949          }
950    
951          if (gettimeofday(&start_time, &dummy) != 0)
952                  perror("bad gettimeofday");
953    
954    
955          for (i = 0; i < jfriedl_XR; i++)
956              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
957    
958          if (gettimeofday(&end_time, &dummy) != 0)
959                  perror("bad gettimeofday");
960    
961          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
962                          -
963                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
964    
965          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
966          return 0;
967      }
968    #endif
969    
970      /* We come back here after a match when the -o option (only_matching) is set,
971      in order to find any further matches in the same line. */
972    
973      ONLY_MATCHING_RESTART:
974    
975    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
976    the final newline in the subject string. */    the final newline in the subject string. */
977    
978    for (i = 0; i < pattern_count; i++)    for (i = 0; i < pattern_count; i++)
979      {      {
980      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
981        offsets, 99);        offsets, 99);
982      if (mrc >= 0) { match = TRUE; break; }      if (mrc >= 0) { match = TRUE; break; }
983      if (mrc != PCRE_ERROR_NOMATCH)      if (mrc != PCRE_ERROR_NOMATCH)
# Line 578  while (ptr < endptr) Line 985  while (ptr < endptr)
985        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
986        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
987        fprintf(stderr, "this line:\n");        fprintf(stderr, "this line:\n");
988        fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */        fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
989        fprintf(stderr, "\n");        fprintf(stderr, "\n");
990        if (error_count == 0 &&        if (error_count == 0 &&
991            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
# Line 625  while (ptr < endptr) Line 1032  while (ptr < endptr)
1032      else if (quiet) return 0;      else if (quiet) return 0;
1033    
1034      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
1035      does not pring any context. */      the --file-offsets and --line-offsets options output offsets for the
1036        matching substring (they both force --only-matching). None of these options
1037        prints any context. Afterwards, adjust the start and length, and then jump
1038        back to look for further matches in the same line. If we are in invert
1039        mode, however, nothing is printed - this could be still useful because the
1040        return code is set. */
1041    
1042      else if (only_matching)      else if (only_matching)
1043        {        {
1044        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1045        if (number) fprintf(stdout, "%d:", linenumber);          {
1046        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1047        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1048            if (line_offsets)
1049              fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
1050                offsets[1] - offsets[0]);
1051            else if (file_offsets)
1052              fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1053                offsets[1] - offsets[0]);
1054            else
1055              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1056            fprintf(stdout, "\n");
1057            matchptr += offsets[1];
1058            length -= offsets[1];
1059            match = FALSE;
1060            goto ONLY_MATCHING_RESTART;
1061            }
1062        }        }
1063    
1064      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 646  while (ptr < endptr) Line 1072  while (ptr < endptr)
1072    
1073        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
1074          {          {
1075            int ellength;
1076          int linecount = 0;          int linecount = 0;
1077          char *p = lastmatchrestart;          char *p = lastmatchrestart;
1078    
1079          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
1080            {            {
1081            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
1082            linecount++;            linecount++;
1083            }            }
1084    
# Line 665  while (ptr < endptr) Line 1091  while (ptr < endptr)
1091            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
1092            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1093            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1094            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1095            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1096            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
1097            }            }
1098          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
1099          }          }
# Line 693  while (ptr < endptr) Line 1119  while (ptr < endptr)
1119                 linecount < before_context)                 linecount < before_context)
1120            {            {
1121            linecount++;            linecount++;
1122            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
1123            }            }
1124    
1125          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 702  while (ptr < endptr) Line 1127  while (ptr < endptr)
1127    
1128          while (p < ptr)          while (p < ptr)
1129            {            {
1130              int ellength;
1131            char *pp = p;            char *pp = p;
1132            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
1133            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1134            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
1135            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            fwrite(p, 1, pp - p, stdout);
1136            p = pp + 1;            p = pp;
1137            }            }
1138          }          }
1139    
# Line 722  while (ptr < endptr) Line 1148  while (ptr < endptr)
1148    
1149        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1150        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1151        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1152        start of the match will always be before the first \n character. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1153          the match will always be before the first newline sequence. */
1154    
1155        if (multiline)        if (multiline)
1156          {          {
1157          char *endmatch = ptr + offsets[1];          int ellength;
1158          t = ptr;          char *endmatch = ptr;
1159          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          if (!invert)
1160          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1161          linelength = endmatch - ptr;            endmatch += offsets[1];
1162              t = ptr;
1163              while (t < endmatch)
1164                {
1165                t = end_of_line(t, endptr, &ellength);
1166                if (t <= endmatch) linenumber++; else break;
1167                }
1168              }
1169            endmatch = end_of_line(endmatch, endptr, &ellength);
1170            linelength = endmatch - ptr - ellength;
1171          }          }
1172    
1173        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 761  while (ptr < endptr) Line 1197  while (ptr < endptr)
1197          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1198          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1199          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1200          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1201              stdout);
1202          }          }
1203        else fwrite(ptr, 1, linelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
   
       fprintf(stdout, "\n");  
1204        }        }
1205    
1206      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 775  while (ptr < endptr) Line 1210  while (ptr < endptr)
1210      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1211      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1212    
1213      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1214      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1215      }      }
1216    
1217    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1218      anything to be printed), we have to move on to the end of the match before
1219      proceeding. */
1220    
1221      if (multiline && invert && match)
1222        {
1223        int ellength;
1224        char *endmatch = ptr + offsets[1];
1225        t = ptr;
1226        while (t < endmatch)
1227          {
1228          t = end_of_line(t, endptr, &ellength);
1229          if (t <= endmatch) linenumber++; else break;
1230          }
1231        endmatch = end_of_line(endmatch, endptr, &ellength);
1232        linelength = endmatch - ptr - ellength;
1233        }
1234    
1235    ptr += linelength + 1;    /* Advance to after the newline and increment the line number. The file
1236      offset to the current line is maintained in filepos. */
1237    
1238      ptr += linelength + endlinelength;
1239      filepos += linelength + endlinelength;
1240    linenumber++;    linenumber++;
1241    
1242    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 803  while (ptr < endptr) Line 1258  while (ptr < endptr)
1258    
1259      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1260      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1261    
1262    #ifdef SUPPORT_LIBZ
1263        if (frtype == FR_LIBZ)
1264          bufflength = 2*MBUFTHIRD +
1265            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1266        else
1267    #endif
1268    
1269    #ifdef SUPPORT_LIBBZ2
1270        if (frtype == FR_LIBBZ2)
1271          bufflength = 2*MBUFTHIRD +
1272            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1273        else
1274    #endif
1275    
1276      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1277    
1278      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1279    
1280      /* Adjust any last match point */      /* Adjust any last match point */
# Line 867  grep_or_recurse(char *pathname, BOOL dir Line 1338  grep_or_recurse(char *pathname, BOOL dir
1338  {  {
1339  int rc = 1;  int rc = 1;
1340  int sep;  int sep;
1341  FILE *in;  int frtype;
1342    int pathlen;
1343    void *handle;
1344    FILE *in = NULL;           /* Ensure initialized */
1345    
1346    #ifdef SUPPORT_LIBZ
1347    gzFile ingz = NULL;
1348    #endif
1349    
1350    #ifdef SUPPORT_LIBBZ2
1351    BZFILE *inbz2 = NULL;
1352    #endif
1353    
1354  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1355    
1356  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1357    {    {
1358    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1359      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1360        stdin_name : NULL);        stdin_name : NULL);
1361    }    }
1362    
   
1363  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1364  each file within it, subject to any include or exclude patterns that were set.  each file within it, subject to any include or exclude patterns that were set.
1365  The scanning code is localized so it can be made system-specific. */  The scanning code is localized so it can be made system-specific. */
# Line 935  skipping was not requested. The scan pro Line 1416  skipping was not requested. The scan pro
1416  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1417  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1418    
1419  in = fopen(pathname, "r");  pathlen = strlen(pathname);
1420  if (in == NULL)  
1421    /* Open using zlib if it is supported and the file name ends with .gz. */
1422    
1423    #ifdef SUPPORT_LIBZ
1424    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1425      {
1426      ingz = gzopen(pathname, "rb");
1427      if (ingz == NULL)
1428        {
1429        if (!silent)
1430          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1431            strerror(errno));
1432        return 2;
1433        }
1434      handle = (void *)ingz;
1435      frtype = FR_LIBZ;
1436      }
1437    else
1438    #endif
1439    
1440    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1441    
1442    #ifdef SUPPORT_LIBBZ2
1443    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1444      {
1445      inbz2 = BZ2_bzopen(pathname, "rb");
1446      handle = (void *)inbz2;
1447      frtype = FR_LIBBZ2;
1448      }
1449    else
1450    #endif
1451    
1452    /* Otherwise use plain fopen(). The label is so that we can come back here if
1453    an attempt to read a .bz2 file indicates that it really is a plain file. */
1454    
1455    #ifdef SUPPORT_LIBBZ2
1456    PLAIN_FILE:
1457    #endif
1458      {
1459      in = fopen(pathname, "r");
1460      handle = (void *)in;
1461      frtype = FR_PLAIN;
1462      }
1463    
1464    /* All the opening methods return errno when they fail. */
1465    
1466    if (handle == NULL)
1467    {    {
1468    if (!silent)    if (!silent)
1469      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 944  if (in == NULL) Line 1471  if (in == NULL)
1471    return 2;    return 2;
1472    }    }
1473    
1474  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1475    
1476    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1477    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1478    
1479    /* Close in an appropriate manner. */
1480    
1481    #ifdef SUPPORT_LIBZ
1482    if (frtype == FR_LIBZ)
1483      gzclose(ingz);
1484    else
1485    #endif
1486    
1487    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1488    read failed. If the error indicates that the file isn't in fact bzipped, try
1489    again as a normal file. */
1490    
1491    #ifdef SUPPORT_LIBBZ2
1492    if (frtype == FR_LIBBZ2)
1493      {
1494      if (rc == 2)
1495        {
1496        int errnum;
1497        const char *err = BZ2_bzerror(inbz2, &errnum);
1498        if (errnum == BZ_DATA_ERROR_MAGIC)
1499          {
1500          BZ2_bzclose(inbz2);
1501          goto PLAIN_FILE;
1502          }
1503        else if (!silent)
1504          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1505            pathname, err);
1506        }
1507      BZ2_bzclose(inbz2);
1508      }
1509    else
1510    #endif
1511    
1512    /* Normal file close */
1513    
1514  fclose(in);  fclose(in);
1515    
1516    /* Pass back the yield from pcregrep(). */
1517    
1518  return rc;  return rc;
1519  }  }
1520    
# Line 968  for (op = optionlist; op->one_char != 0; Line 1535  for (op = optionlist; op->one_char != 0;
1535    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1536    }    }
1537  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1538  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1539      "options.\n");
1540  return rc;  return rc;
1541  }  }
1542    
# Line 987  option_item *op; Line 1555  option_item *op;
1555  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1556  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1557  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1558  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1559    
1560    #ifdef SUPPORT_LIBZ
1561    printf("Files whose names end in .gz are read using zlib.\n");
1562    #endif
1563    
1564    #ifdef SUPPORT_LIBBZ2
1565    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1566    #endif
1567    
1568    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1569    printf("Other files and the standard input are read as plain files.\n\n");
1570    #else
1571    printf("All files are read as plain files, without any interpretation.\n\n");
1572    #endif
1573    
1574    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1575  printf("Options:\n");  printf("Options:\n");
1576    
1577  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 997  for (op = optionlist; op->one_char != 0; Line 1579  for (op = optionlist; op->one_char != 0;
1579    int n;    int n;
1580    char s[4];    char s[4];
1581    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1582    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1583    if (n < 1) n = 1;    if (n < 1) n = 1;
1584    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1585    }    }
# Line 1023  handle_option(int letter, int options) Line 1604  handle_option(int letter, int options)
1604  {  {
1605  switch(letter)  switch(letter)
1606    {    {
1607      case N_FOFFSETS: file_offsets = TRUE; break;
1608    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1609      case N_LOFFSETS: line_offsets = number = TRUE; break;
1610    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1611    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1612    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
# Line 1037  switch(letter) Line 1620  switch(letter)
1620    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1621    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1622    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1623    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1624    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1625    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1626    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
1627    
1628    case 'V':    case 'V':
1629    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1630    exit(0);    exit(0);
1631    break;    break;
1632    
# Line 1120  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1702  sprintf(buffer, "%s%.*s%s", prefix[proce
1702    suffix[process_options]);    suffix[process_options]);
1703  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1704    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1705  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1706      {
1707      pattern_count++;
1708      return TRUE;
1709      }
1710    
1711  /* Handle compile errors */  /* Handle compile errors */
1712    
# Line 1152  return FALSE; Line 1738  return FALSE;
1738  *************************************************/  *************************************************/
1739    
1740  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1741  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1742    
1743  Arguments:  Arguments:
1744    pattern        the pattern string    pattern        the pattern string
# Line 1170  compile_pattern(char *pattern, int optio Line 1756  compile_pattern(char *pattern, int optio
1756  {  {
1757  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1758    {    {
1759      char *eop = pattern + strlen(pattern);
1760    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1761    for(;;)    for(;;)
1762      {      {
1763      char *p = strchr(pattern, '\n');      int ellength;
1764      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1765        if (ellength == 0)
1766        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1767      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1768      pattern = p + 1;      pattern = p;
1769      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1770        return FALSE;        return FALSE;
1771      }      }
# Line 1200  int i, j; Line 1788  int i, j;
1788  int rc = 1;  int rc = 1;
1789  int pcre_options = 0;  int pcre_options = 0;
1790  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1791    int hint_count = 0;
1792  int errptr;  int errptr;
1793  BOOL only_one_at_top;  BOOL only_one_at_top;
1794  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
1795  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1796  const char *error;  const char *error;
1797    
1798    /* Set the default line ending value from the default in the PCRE library;
1799    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1800    */
1801    
1802    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1803    switch(i)
1804      {
1805      default:                 newline = (char *)"lf"; break;
1806      case '\r':               newline = (char *)"cr"; break;
1807      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1808      case -1:                 newline = (char *)"any"; break;
1809      case -2:                 newline = (char *)"anycrlf"; break;
1810      }
1811    
1812  /* Process the options */  /* Process the options */
1813    
1814  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1261  for (i = 1; i < argc; i++) Line 1864  for (i = 1; i < argc; i++)
1864          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1865            {            {
1866            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1867            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1868            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1869              {              {
1870              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1280  for (i = 1; i < argc; i++) Line 1883  for (i = 1; i < argc; i++)
1883          char buff2[24];          char buff2[24];
1884          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1885          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1886          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1887            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1888          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1889            break;            break;
1890          }          }
# Line 1294  for (i = 1; i < argc; i++) Line 1897  for (i = 1; i < argc; i++)
1897        }        }
1898      }      }
1899    
1900    
1901      /* Jeffrey Friedl's debugging harness uses these additional options which
1902      are not in the right form for putting in the option table because they use
1903      only one hyphen, yet are more than one character long. By putting them
1904      separately here, they will not get displayed as part of the help() output,
1905      but I don't think Jeffrey will care about that. */
1906    
1907    #ifdef JFRIEDL_DEBUG
1908      else if (strcmp(argv[i], "-pre") == 0) {
1909              jfriedl_prefix = argv[++i];
1910              continue;
1911      } else if (strcmp(argv[i], "-post") == 0) {
1912              jfriedl_postfix = argv[++i];
1913              continue;
1914      } else if (strcmp(argv[i], "-XT") == 0) {
1915              sscanf(argv[++i], "%d", &jfriedl_XT);
1916              continue;
1917      } else if (strcmp(argv[i], "-XR") == 0) {
1918              sscanf(argv[++i], "%d", &jfriedl_XR);
1919              continue;
1920      }
1921    #endif
1922    
1923    
1924    /* One-char options; many that have no data may be in a single argument; we    /* One-char options; many that have no data may be in a single argument; we
1925    continue till we hit the last one or one that needs data. */    continue till we hit the last one or one that needs data. */
1926    
# Line 1333  for (i = 1; i < argc; i++) Line 1960  for (i = 1; i < argc; i++)
1960    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1961    either has a value or defaults to something. It cannot have data in a    either has a value or defaults to something. It cannot have data in a
1962    separate item. At the moment, the only such options are "colo(u)r" and    separate item. At the moment, the only such options are "colo(u)r" and
1963    Jeffrey Friedl's special debugging option. */    Jeffrey Friedl's special -S debugging option. */
1964    
1965    if (*option_data == 0 &&    if (*option_data == 0 &&
1966        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
# Line 1416  if (both_context > 0) Line 2043  if (both_context > 0)
2043    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2044    }    }
2045    
2046    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2047    However, the latter two set the only_matching flag. */
2048    
2049    if ((only_matching && (file_offsets || line_offsets)) ||
2050        (file_offsets && line_offsets))
2051      {
2052      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2053        "and/or --line-offsets\n");
2054      exit(usage(2));
2055      }
2056    
2057    if (file_offsets || line_offsets) only_matching = TRUE;
2058    
2059  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2060  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2061    
# Line 1465  if (colour_option != NULL && strcmp(colo Line 2105  if (colour_option != NULL && strcmp(colo
2105      }      }
2106    }    }
2107    
2108    /* Interpret the newline type; the default settings are Unix-like. */
2109    
2110    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2111      {
2112      pcre_options |= PCRE_NEWLINE_CR;
2113      endlinetype = EL_CR;
2114      }
2115    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2116      {
2117      pcre_options |= PCRE_NEWLINE_LF;
2118      endlinetype = EL_LF;
2119      }
2120    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2121      {
2122      pcre_options |= PCRE_NEWLINE_CRLF;
2123      endlinetype = EL_CRLF;
2124      }
2125    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2126      {
2127      pcre_options |= PCRE_NEWLINE_ANY;
2128      endlinetype = EL_ANY;
2129      }
2130    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2131      {
2132      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2133      endlinetype = EL_ANYCRLF;
2134      }
2135    else
2136      {
2137      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2138      return 2;
2139      }
2140    
2141  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
2142    
2143  if (dee_option != NULL)  if (dee_option != NULL)
# Line 1490  if (DEE_option != NULL) Line 2163  if (DEE_option != NULL)
2163      }      }
2164    }    }
2165    
2166  /* Check the value for Jeff Friedl's debugging option. */  /* Check the values for Jeffrey Friedl's debugging options. */
2167    
2168  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
2169  if (S_arg > 9)  if (S_arg > 9)
# Line 1498  if (S_arg > 9) Line 2171  if (S_arg > 9)
2171    fprintf(stderr, "pcregrep: bad value for -S option\n");    fprintf(stderr, "pcregrep: bad value for -S option\n");
2172    return 2;    return 2;
2173    }    }
2174    if (jfriedl_XT != 0 || jfriedl_XR != 0)
2175      {
2176      if (jfriedl_XT == 0) jfriedl_XT = 1;
2177      if (jfriedl_XR == 0) jfriedl_XR = 1;
2178      }
2179  #endif  #endif
2180    
2181  /* Get memory to store the pattern and hints lists. */  /* Get memory to store the pattern and hints lists. */
# Line 1508  hints_list = (pcre_extra **)malloc(MAX_P Line 2186  hints_list = (pcre_extra **)malloc(MAX_P
2186  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
2187    {    {
2188    fprintf(stderr, "pcregrep: malloc failed\n");    fprintf(stderr, "pcregrep: malloc failed\n");
2189    return 2;    goto EXIT2;
2190    }    }
2191    
2192  /* If no patterns were provided by -e, and there is no file provided by -f,  /* If no patterns were provided by -e, and there is no file provided by -f,
# Line 1527  for (j = 0; j < cmd_pattern_count; j++) Line 2205  for (j = 0; j < cmd_pattern_count; j++)
2205    {    {
2206    if (!compile_pattern(patterns[j], pcre_options, NULL,    if (!compile_pattern(patterns[j], pcre_options, NULL,
2207         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))         (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2208      return 2;      goto EXIT2;
2209    }    }
2210    
2211  /* Compile the regular expressions that are provided in a file. */  /* Compile the regular expressions that are provided in a file. */
# Line 1551  if (pattern_filename != NULL) Line 2229  if (pattern_filename != NULL)
2229        {        {
2230        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2231          strerror(errno));          strerror(errno));
2232        return 2;        goto EXIT2;
2233        }        }
2234      filename = pattern_filename;      filename = pattern_filename;
2235      }      }
# Line 1564  if (pattern_filename != NULL) Line 2242  if (pattern_filename != NULL)
2242      linenumber++;      linenumber++;
2243      if (buffer[0] == 0) continue;   /* Skip blank lines */      if (buffer[0] == 0) continue;   /* Skip blank lines */
2244      if (!compile_pattern(buffer, pcre_options, filename, linenumber))      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2245        return 2;        goto EXIT2;
2246      }      }
2247    
2248    if (f != stdin) fclose(f);    if (f != stdin) fclose(f);
# Line 1580  for (j = 0; j < pattern_count; j++) Line 2258  for (j = 0; j < pattern_count; j++)
2258      char s[16];      char s[16];
2259      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2260      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2261      return 2;      goto EXIT2;
2262      }      }
2263      hint_count++;
2264    }    }
2265    
2266  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1594  if (exclude_pattern != NULL) Line 2273  if (exclude_pattern != NULL)
2273      {      {
2274      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2275        errptr, error);        errptr, error);
2276      return 2;      goto EXIT2;
2277      }      }
2278    }    }
2279    
# Line 1606  if (include_pattern != NULL) Line 2285  if (include_pattern != NULL)
2285      {      {
2286      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",      fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2287        errptr, error);        errptr, error);
2288      return 2;      goto EXIT2;
2289      }      }
2290    }    }
2291    
2292  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2293    
2294  if (i >= argc)  if (i >= argc)
2295    return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    {
2296      rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2297      goto EXIT;
2298      }
2299    
2300  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2301  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
# Line 1630  for (; i < argc; i++) Line 2312  for (; i < argc; i++)
2312      else if (frc == 0 && rc == 1) rc = 0;      else if (frc == 0 && rc == 1) rc = 0;
2313    }    }
2314    
2315    EXIT:
2316    if (pattern_list != NULL)
2317      {
2318      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2319      free(pattern_list);
2320      }
2321    if (hints_list != NULL)
2322      {
2323      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2324      free(hints_list);
2325      }
2326  return rc;  return rc;
2327    
2328    EXIT2:
2329    rc = 2;
2330    goto EXIT;
2331  }  }
2332    
2333  /* End of pcregrep */  /* End of pcregrep */

Legend:
Removed from v.87  
changed lines
  Added in v.305

  ViewVC Help
Powered by ViewVC 1.1.5