6 |
its pattern matching. On a Unix or Win32 system it can recurse into |
its pattern matching. On a Unix or Win32 system it can recurse into |
7 |
directories. |
directories. |
8 |
|
|
9 |
Copyright (c) 1997-2007 University of Cambridge |
Copyright (c) 1997-2009 University of Cambridge |
10 |
|
|
11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
55 |
#include <unistd.h> |
#include <unistd.h> |
56 |
#endif |
#endif |
57 |
|
|
58 |
|
#ifdef SUPPORT_LIBZ |
59 |
|
#include <zlib.h> |
60 |
|
#endif |
61 |
|
|
62 |
|
#ifdef SUPPORT_LIBBZ2 |
63 |
|
#include <bzlib.h> |
64 |
|
#endif |
65 |
|
|
66 |
#include "pcre.h" |
#include "pcre.h" |
67 |
|
|
68 |
#define FALSE 0 |
#define FALSE 0 |
71 |
typedef int BOOL; |
typedef int BOOL; |
72 |
|
|
73 |
#define MAX_PATTERN_COUNT 100 |
#define MAX_PATTERN_COUNT 100 |
74 |
|
#define OFFSET_SIZE 99 |
75 |
|
|
76 |
#if BUFSIZ > 8192 |
#if BUFSIZ > 8192 |
77 |
#define MBUFTHIRD BUFSIZ |
#define MBUFTHIRD BUFSIZ |
85 |
|
|
86 |
enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE }; |
enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE }; |
87 |
|
|
88 |
|
/* File reading styles */ |
89 |
|
|
90 |
|
enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 }; |
91 |
|
|
92 |
/* Actions for the -d and -D options */ |
/* Actions for the -d and -D options */ |
93 |
|
|
94 |
enum { dee_READ, dee_SKIP, dee_RECURSE }; |
enum { dee_READ, dee_SKIP, dee_RECURSE }; |
140 |
|
|
141 |
static char *include_pattern = NULL; |
static char *include_pattern = NULL; |
142 |
static char *exclude_pattern = NULL; |
static char *exclude_pattern = NULL; |
143 |
|
static char *include_dir_pattern = NULL; |
144 |
|
static char *exclude_dir_pattern = NULL; |
145 |
|
|
146 |
static pcre *include_compiled = NULL; |
static pcre *include_compiled = NULL; |
147 |
static pcre *exclude_compiled = NULL; |
static pcre *exclude_compiled = NULL; |
148 |
|
static pcre *include_dir_compiled = NULL; |
149 |
|
static pcre *exclude_dir_compiled = NULL; |
150 |
|
|
151 |
static int after_context = 0; |
static int after_context = 0; |
152 |
static int before_context = 0; |
static int before_context = 0; |
159 |
|
|
160 |
static BOOL count_only = FALSE; |
static BOOL count_only = FALSE; |
161 |
static BOOL do_colour = FALSE; |
static BOOL do_colour = FALSE; |
162 |
|
static BOOL file_offsets = FALSE; |
163 |
static BOOL hyphenpending = FALSE; |
static BOOL hyphenpending = FALSE; |
164 |
static BOOL invert = FALSE; |
static BOOL invert = FALSE; |
165 |
|
static BOOL line_offsets = FALSE; |
166 |
static BOOL multiline = FALSE; |
static BOOL multiline = FALSE; |
167 |
static BOOL number = FALSE; |
static BOOL number = FALSE; |
168 |
static BOOL only_matching = FALSE; |
static BOOL only_matching = FALSE; |
186 |
/* Options without a single-letter equivalent get a negative value. This can be |
/* Options without a single-letter equivalent get a negative value. This can be |
187 |
used to identify them. */ |
used to identify them. */ |
188 |
|
|
189 |
#define N_COLOUR (-1) |
#define N_COLOUR (-1) |
190 |
#define N_EXCLUDE (-2) |
#define N_EXCLUDE (-2) |
191 |
#define N_HELP (-3) |
#define N_EXCLUDE_DIR (-3) |
192 |
#define N_INCLUDE (-4) |
#define N_HELP (-4) |
193 |
#define N_LABEL (-5) |
#define N_INCLUDE (-5) |
194 |
#define N_LOCALE (-6) |
#define N_INCLUDE_DIR (-6) |
195 |
#define N_NULL (-7) |
#define N_LABEL (-7) |
196 |
|
#define N_LOCALE (-8) |
197 |
|
#define N_NULL (-9) |
198 |
|
#define N_LOFFSETS (-10) |
199 |
|
#define N_FOFFSETS (-11) |
200 |
|
|
201 |
static option_item optionlist[] = { |
static option_item optionlist[] = { |
202 |
{ OP_NODATA, N_NULL, NULL, "", " terminate options" }, |
{ OP_NODATA, N_NULL, NULL, "", " terminate options" }, |
212 |
{ OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" }, |
{ OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" }, |
213 |
{ OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" }, |
{ OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" }, |
214 |
{ OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" }, |
{ OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" }, |
215 |
|
{ OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" }, |
216 |
{ OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, |
{ OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, |
217 |
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, |
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, |
218 |
{ OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, |
{ OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, |
219 |
{ OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, |
{ OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, |
220 |
{ OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, |
{ OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, |
221 |
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, |
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, |
222 |
|
{ OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" }, |
223 |
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, |
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, |
224 |
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, |
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, |
225 |
{ OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, |
{ OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, |
226 |
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, |
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, |
227 |
{ OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" }, |
{ OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" }, |
228 |
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, |
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, |
229 |
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, |
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, |
230 |
{ OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" }, |
{ OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" }, |
231 |
{ OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" }, |
{ OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" }, |
232 |
|
{ OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" }, |
233 |
|
{ OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" }, |
234 |
#ifdef JFRIEDL_DEBUG |
#ifdef JFRIEDL_DEBUG |
235 |
{ OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, |
{ OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, |
236 |
#endif |
#endif |
343 |
|
|
344 |
/* I (Philip Hazel) have no means of testing this code. It was contributed by |
/* I (Philip Hazel) have no means of testing this code. It was contributed by |
345 |
Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES |
Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES |
346 |
when it did not exist. */ |
when it did not exist. David Byron added a patch that moved the #include of |
347 |
|
<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. |
348 |
|
*/ |
349 |
|
|
350 |
#elif HAVE_WINDOWS_H |
#elif HAVE_WINDOWS_H |
351 |
|
|
355 |
#ifndef WIN32_LEAN_AND_MEAN |
#ifndef WIN32_LEAN_AND_MEAN |
356 |
# define WIN32_LEAN_AND_MEAN |
# define WIN32_LEAN_AND_MEAN |
357 |
#endif |
#endif |
358 |
|
|
359 |
|
#include <windows.h> |
360 |
|
|
361 |
#ifndef INVALID_FILE_ATTRIBUTES |
#ifndef INVALID_FILE_ATTRIBUTES |
362 |
#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF |
#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF |
363 |
#endif |
#endif |
364 |
|
|
|
#include <windows.h> |
|
|
|
|
365 |
typedef struct directory_type |
typedef struct directory_type |
366 |
{ |
{ |
367 |
HANDLE handle; |
HANDLE handle; |
446 |
|
|
447 |
int isregfile(char *filename) |
int isregfile(char *filename) |
448 |
{ |
{ |
449 |
return !isdirectory(filename) |
return !isdirectory(filename); |
450 |
} |
} |
451 |
|
|
452 |
|
|
457 |
static BOOL |
static BOOL |
458 |
is_stdout_tty(void) |
is_stdout_tty(void) |
459 |
{ |
{ |
460 |
FALSE; |
return FALSE; |
461 |
} |
} |
462 |
|
|
463 |
|
|
822 |
|
|
823 |
|
|
824 |
/************************************************* |
/************************************************* |
825 |
|
* Apply patterns to subject till one matches * |
826 |
|
*************************************************/ |
827 |
|
|
828 |
|
/* This function is called to run through all patterns, looking for a match. It |
829 |
|
is used multiple times for the same subject when colouring is enabled, in order |
830 |
|
to find all possible matches. |
831 |
|
|
832 |
|
Arguments: |
833 |
|
matchptr the start of the subject |
834 |
|
length the length of the subject to match |
835 |
|
offsets the offets vector to fill in |
836 |
|
mrc address of where to put the result of pcre_exec() |
837 |
|
|
838 |
|
Returns: TRUE if there was a match |
839 |
|
FALSE if there was no match |
840 |
|
invert if there was a non-fatal error |
841 |
|
*/ |
842 |
|
|
843 |
|
static BOOL |
844 |
|
match_patterns(char *matchptr, size_t length, int *offsets, int *mrc) |
845 |
|
{ |
846 |
|
int i; |
847 |
|
for (i = 0; i < pattern_count; i++) |
848 |
|
{ |
849 |
|
*mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, |
850 |
|
PCRE_NOTEMPTY, offsets, OFFSET_SIZE); |
851 |
|
if (*mrc >= 0) return TRUE; |
852 |
|
if (*mrc == PCRE_ERROR_NOMATCH) continue; |
853 |
|
fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc); |
854 |
|
if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1); |
855 |
|
fprintf(stderr, "this text:\n"); |
856 |
|
fwrite(matchptr, 1, length, stderr); /* In case binary zero included */ |
857 |
|
fprintf(stderr, "\n"); |
858 |
|
if (error_count == 0 && |
859 |
|
(*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)) |
860 |
|
{ |
861 |
|
fprintf(stderr, "pcregrep: error %d means that a resource limit " |
862 |
|
"was exceeded\n", *mrc); |
863 |
|
fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n"); |
864 |
|
} |
865 |
|
if (error_count++ > 20) |
866 |
|
{ |
867 |
|
fprintf(stderr, "pcregrep: too many errors - abandoned\n"); |
868 |
|
exit(2); |
869 |
|
} |
870 |
|
return invert; /* No more matching; don't show the line again */ |
871 |
|
} |
872 |
|
|
873 |
|
return FALSE; /* No match, no errors */ |
874 |
|
} |
875 |
|
|
876 |
|
|
877 |
|
|
878 |
|
/************************************************* |
879 |
* Grep an individual file * |
* Grep an individual file * |
880 |
*************************************************/ |
*************************************************/ |
881 |
|
|
887 |
"before" context printing. |
"before" context printing. |
888 |
|
|
889 |
Arguments: |
Arguments: |
890 |
in the fopened FILE stream |
handle the fopened FILE stream for a normal file |
891 |
|
the gzFile pointer when reading is via libz |
892 |
|
the BZFILE pointer when reading is via libbz2 |
893 |
|
frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2 |
894 |
printname the file name if it is to be printed for each match |
printname the file name if it is to be printed for each match |
895 |
or NULL if the file name is not to be printed |
or NULL if the file name is not to be printed |
896 |
it cannot be NULL if filenames[_nomatch]_only is set |
it cannot be NULL if filenames[_nomatch]_only is set |
897 |
|
|
898 |
Returns: 0 if there was at least one match |
Returns: 0 if there was at least one match |
899 |
1 otherwise (no matches) |
1 otherwise (no matches) |
900 |
|
2 if there is a read error on a .bz2 file |
901 |
*/ |
*/ |
902 |
|
|
903 |
static int |
static int |
904 |
pcregrep(FILE *in, char *printname) |
pcregrep(void *handle, int frtype, char *printname) |
905 |
{ |
{ |
906 |
int rc = 1; |
int rc = 1; |
907 |
int linenumber = 1; |
int linenumber = 1; |
908 |
int lastmatchnumber = 0; |
int lastmatchnumber = 0; |
909 |
int count = 0; |
int count = 0; |
910 |
int offsets[99]; |
int filepos = 0; |
911 |
|
int offsets[OFFSET_SIZE]; |
912 |
char *lastmatchrestart = NULL; |
char *lastmatchrestart = NULL; |
913 |
char buffer[3*MBUFTHIRD]; |
char buffer[3*MBUFTHIRD]; |
914 |
char *ptr = buffer; |
char *ptr = buffer; |
915 |
char *endptr; |
char *endptr; |
916 |
size_t bufflength; |
size_t bufflength; |
917 |
BOOL endhyphenpending = FALSE; |
BOOL endhyphenpending = FALSE; |
918 |
|
FILE *in = NULL; /* Ensure initialized */ |
919 |
|
|
920 |
|
#ifdef SUPPORT_LIBZ |
921 |
|
gzFile ingz = NULL; |
922 |
|
#endif |
923 |
|
|
924 |
|
#ifdef SUPPORT_LIBBZ2 |
925 |
|
BZFILE *inbz2 = NULL; |
926 |
|
#endif |
927 |
|
|
928 |
|
|
929 |
|
/* Do the first read into the start of the buffer and set up the pointer to end |
930 |
|
of what we have. In the case of libz, a non-zipped .gz file will be read as a |
931 |
|
plain file. However, if a .bz2 file isn't actually bzipped, the first read will |
932 |
|
fail. */ |
933 |
|
|
934 |
|
#ifdef SUPPORT_LIBZ |
935 |
|
if (frtype == FR_LIBZ) |
936 |
|
{ |
937 |
|
ingz = (gzFile)handle; |
938 |
|
bufflength = gzread (ingz, buffer, 3*MBUFTHIRD); |
939 |
|
} |
940 |
|
else |
941 |
|
#endif |
942 |
|
|
943 |
/* Do the first read into the start of the buffer and set up the pointer to |
#ifdef SUPPORT_LIBBZ2 |
944 |
end of what we have. */ |
if (frtype == FR_LIBBZ2) |
945 |
|
{ |
946 |
|
inbz2 = (BZFILE *)handle; |
947 |
|
bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD); |
948 |
|
if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */ |
949 |
|
} /* without the cast it is unsigned. */ |
950 |
|
else |
951 |
|
#endif |
952 |
|
|
953 |
|
{ |
954 |
|
in = (FILE *)handle; |
955 |
|
bufflength = fread(buffer, 1, 3*MBUFTHIRD, in); |
956 |
|
} |
957 |
|
|
|
bufflength = fread(buffer, 1, 3*MBUFTHIRD, in); |
|
958 |
endptr = buffer + bufflength; |
endptr = buffer + bufflength; |
959 |
|
|
960 |
/* Loop while the current pointer is not at the end of the file. For large |
/* Loop while the current pointer is not at the end of the file. For large |
964 |
|
|
965 |
while (ptr < endptr) |
while (ptr < endptr) |
966 |
{ |
{ |
967 |
int i, endlinelength; |
int endlinelength; |
968 |
int mrc = 0; |
int mrc = 0; |
969 |
BOOL match = FALSE; |
BOOL match; |
970 |
|
char *matchptr = ptr; |
971 |
char *t = ptr; |
char *t = ptr; |
972 |
size_t length, linelength; |
size_t length, linelength; |
973 |
|
|
974 |
/* At this point, ptr is at the start of a line. We need to find the length |
/* At this point, ptr is at the start of a line. We need to find the length |
975 |
of the subject string to pass to pcre_exec(). In multiline mode, it is the |
of the subject string to pass to pcre_exec(). In multiline mode, it is the |
976 |
length remainder of the data in the buffer. Otherwise, it is the length of |
length remainder of the data in the buffer. Otherwise, it is the length of |
977 |
the next line. After matching, we always advance by the length of the next |
the next line, excluding the terminating newline. After matching, we always |
978 |
line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so |
advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE |
979 |
that any match is constrained to be in the first line. */ |
option is used for compiling, so that any match is constrained to be in the |
980 |
|
first line. */ |
981 |
|
|
982 |
t = end_of_line(t, endptr, &endlinelength); |
t = end_of_line(t, endptr, &endlinelength); |
983 |
linelength = t - ptr - endlinelength; |
linelength = t - ptr - endlinelength; |
992 |
#include <time.h> |
#include <time.h> |
993 |
struct timeval start_time, end_time; |
struct timeval start_time, end_time; |
994 |
struct timezone dummy; |
struct timezone dummy; |
995 |
|
int i; |
996 |
|
|
997 |
if (jfriedl_XT) |
if (jfriedl_XT) |
998 |
{ |
{ |
1018 |
|
|
1019 |
|
|
1020 |
for (i = 0; i < jfriedl_XR; i++) |
for (i = 0; i < jfriedl_XR; i++) |
1021 |
match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0); |
match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, |
1022 |
|
PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0); |
1023 |
|
|
1024 |
if (gettimeofday(&end_time, &dummy) != 0) |
if (gettimeofday(&end_time, &dummy) != 0) |
1025 |
perror("bad gettimeofday"); |
perror("bad gettimeofday"); |
1033 |
} |
} |
1034 |
#endif |
#endif |
1035 |
|
|
1036 |
|
/* We come back here after a match when the -o option (only_matching) is set, |
1037 |
|
in order to find any further matches in the same line. */ |
1038 |
|
|
1039 |
/* Run through all the patterns until one matches. Note that we don't include |
ONLY_MATCHING_RESTART: |
|
the final newline in the subject string. */ |
|
1040 |
|
|
1041 |
for (i = 0; i < pattern_count; i++) |
/* Run through all the patterns until one matches or there is an error other |
1042 |
{ |
than NOMATCH. This code is in a subroutine so that it can be re-used for |
1043 |
mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0, |
finding subsequent matches when colouring matched lines. */ |
1044 |
offsets, 99); |
|
1045 |
if (mrc >= 0) { match = TRUE; break; } |
match = match_patterns(matchptr, length, offsets, &mrc); |
|
if (mrc != PCRE_ERROR_NOMATCH) |
|
|
{ |
|
|
fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc); |
|
|
if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1); |
|
|
fprintf(stderr, "this line:\n"); |
|
|
fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */ |
|
|
fprintf(stderr, "\n"); |
|
|
if (error_count == 0 && |
|
|
(mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT)) |
|
|
{ |
|
|
fprintf(stderr, "pcregrep: error %d means that a resource limit " |
|
|
"was exceeded\n", mrc); |
|
|
fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n"); |
|
|
} |
|
|
if (error_count++ > 20) |
|
|
{ |
|
|
fprintf(stderr, "pcregrep: too many errors - abandoned\n"); |
|
|
exit(2); |
|
|
} |
|
|
match = invert; /* No more matching; don't show the line again */ |
|
|
break; |
|
|
} |
|
|
} |
|
1046 |
|
|
1047 |
/* If it's a match or a not-match (as required), do what's wanted. */ |
/* If it's a match or a not-match (as required), do what's wanted. */ |
1048 |
|
|
1072 |
else if (quiet) return 0; |
else if (quiet) return 0; |
1073 |
|
|
1074 |
/* The --only-matching option prints just the substring that matched, and |
/* The --only-matching option prints just the substring that matched, and |
1075 |
does not pring any context. */ |
the --file-offsets and --line-offsets options output offsets for the |
1076 |
|
matching substring (they both force --only-matching). None of these options |
1077 |
|
prints any context. Afterwards, adjust the start and length, and then jump |
1078 |
|
back to look for further matches in the same line. If we are in invert |
1079 |
|
mode, however, nothing is printed - this could be still useful because the |
1080 |
|
return code is set. */ |
1081 |
|
|
1082 |
else if (only_matching) |
else if (only_matching) |
1083 |
{ |
{ |
1084 |
if (printname != NULL) fprintf(stdout, "%s:", printname); |
if (!invert) |
1085 |
if (number) fprintf(stdout, "%d:", linenumber); |
{ |
1086 |
fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); |
if (printname != NULL) fprintf(stdout, "%s:", printname); |
1087 |
fprintf(stdout, "\n"); |
if (number) fprintf(stdout, "%d:", linenumber); |
1088 |
|
if (line_offsets) |
1089 |
|
fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr), |
1090 |
|
offsets[1] - offsets[0]); |
1091 |
|
else if (file_offsets) |
1092 |
|
fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr), |
1093 |
|
offsets[1] - offsets[0]); |
1094 |
|
else |
1095 |
|
{ |
1096 |
|
if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); |
1097 |
|
fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); |
1098 |
|
if (do_colour) fprintf(stdout, "%c[00m", 0x1b); |
1099 |
|
} |
1100 |
|
fprintf(stdout, "\n"); |
1101 |
|
matchptr += offsets[1]; |
1102 |
|
length -= offsets[1]; |
1103 |
|
match = FALSE; |
1104 |
|
goto ONLY_MATCHING_RESTART; |
1105 |
|
} |
1106 |
} |
} |
1107 |
|
|
1108 |
/* This is the default case when none of the above options is set. We print |
/* This is the default case when none of the above options is set. We print |
1233 |
else |
else |
1234 |
#endif |
#endif |
1235 |
|
|
1236 |
/* We have to split the line(s) up if colouring. */ |
/* We have to split the line(s) up if colouring, and search for further |
1237 |
|
matches. */ |
1238 |
|
|
1239 |
if (do_colour) |
if (do_colour) |
1240 |
{ |
{ |
1241 |
|
int last_offset = 0; |
1242 |
fwrite(ptr, 1, offsets[0], stdout); |
fwrite(ptr, 1, offsets[0], stdout); |
1243 |
fprintf(stdout, "%c[%sm", 0x1b, colour_string); |
fprintf(stdout, "%c[%sm", 0x1b, colour_string); |
1244 |
fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); |
fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); |
1245 |
fprintf(stdout, "%c[00m", 0x1b); |
fprintf(stdout, "%c[00m", 0x1b); |
1246 |
fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout); |
for (;;) |
1247 |
|
{ |
1248 |
|
last_offset += offsets[1]; |
1249 |
|
matchptr += offsets[1]; |
1250 |
|
length -= offsets[1]; |
1251 |
|
if (!match_patterns(matchptr, length, offsets, &mrc)) break; |
1252 |
|
fwrite(matchptr, 1, offsets[0], stdout); |
1253 |
|
fprintf(stdout, "%c[%sm", 0x1b, colour_string); |
1254 |
|
fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); |
1255 |
|
fprintf(stdout, "%c[00m", 0x1b); |
1256 |
|
} |
1257 |
|
fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset, |
1258 |
|
stdout); |
1259 |
} |
} |
1260 |
|
|
1261 |
|
/* Not colouring; no need to search for further matches */ |
1262 |
|
|
1263 |
else fwrite(ptr, 1, linelength + endlinelength, stdout); |
else fwrite(ptr, 1, linelength + endlinelength, stdout); |
1264 |
} |
} |
1265 |
|
|
1292 |
linelength = endmatch - ptr - ellength; |
linelength = endmatch - ptr - ellength; |
1293 |
} |
} |
1294 |
|
|
1295 |
/* Advance to after the newline and increment the line number. */ |
/* Advance to after the newline and increment the line number. The file |
1296 |
|
offset to the current line is maintained in filepos. */ |
1297 |
|
|
1298 |
ptr += linelength + endlinelength; |
ptr += linelength + endlinelength; |
1299 |
|
filepos += linelength + endlinelength; |
1300 |
linenumber++; |
linenumber++; |
1301 |
|
|
1302 |
/* If we haven't yet reached the end of the file (the buffer is full), and |
/* If we haven't yet reached the end of the file (the buffer is full), and |
1318 |
|
|
1319 |
memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD); |
memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD); |
1320 |
ptr -= MBUFTHIRD; |
ptr -= MBUFTHIRD; |
1321 |
|
|
1322 |
|
#ifdef SUPPORT_LIBZ |
1323 |
|
if (frtype == FR_LIBZ) |
1324 |
|
bufflength = 2*MBUFTHIRD + |
1325 |
|
gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD); |
1326 |
|
else |
1327 |
|
#endif |
1328 |
|
|
1329 |
|
#ifdef SUPPORT_LIBBZ2 |
1330 |
|
if (frtype == FR_LIBBZ2) |
1331 |
|
bufflength = 2*MBUFTHIRD + |
1332 |
|
BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD); |
1333 |
|
else |
1334 |
|
#endif |
1335 |
|
|
1336 |
bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in); |
bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in); |
1337 |
|
|
1338 |
endptr = buffer + bufflength; |
endptr = buffer + bufflength; |
1339 |
|
|
1340 |
/* Adjust any last match point */ |
/* Adjust any last match point */ |
1398 |
{ |
{ |
1399 |
int rc = 1; |
int rc = 1; |
1400 |
int sep; |
int sep; |
1401 |
FILE *in; |
int frtype; |
1402 |
|
int pathlen; |
1403 |
|
void *handle; |
1404 |
|
FILE *in = NULL; /* Ensure initialized */ |
1405 |
|
|
1406 |
|
#ifdef SUPPORT_LIBZ |
1407 |
|
gzFile ingz = NULL; |
1408 |
|
#endif |
1409 |
|
|
1410 |
|
#ifdef SUPPORT_LIBBZ2 |
1411 |
|
BZFILE *inbz2 = NULL; |
1412 |
|
#endif |
1413 |
|
|
1414 |
/* If the file name is "-" we scan stdin */ |
/* If the file name is "-" we scan stdin */ |
1415 |
|
|
1416 |
if (strcmp(pathname, "-") == 0) |
if (strcmp(pathname, "-") == 0) |
1417 |
{ |
{ |
1418 |
return pcregrep(stdin, |
return pcregrep(stdin, FR_PLAIN, |
1419 |
(filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? |
(filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? |
1420 |
stdin_name : NULL); |
stdin_name : NULL); |
1421 |
} |
} |
1422 |
|
|
|
|
|
1423 |
/* If the file is a directory, skip if skipping or if we are recursing, scan |
/* If the file is a directory, skip if skipping or if we are recursing, scan |
1424 |
each file within it, subject to any include or exclude patterns that were set. |
each file and directory within it, subject to any include or exclude patterns |
1425 |
The scanning code is localized so it can be made system-specific. */ |
that were set. The scanning code is localized so it can be made |
1426 |
|
system-specific. */ |
1427 |
|
|
1428 |
if ((sep = isdirectory(pathname)) != 0) |
if ((sep = isdirectory(pathname)) != 0) |
1429 |
{ |
{ |
1444 |
|
|
1445 |
while ((nextfile = readdirectory(dir)) != NULL) |
while ((nextfile = readdirectory(dir)) != NULL) |
1446 |
{ |
{ |
1447 |
int frc, blen; |
int frc, nflen; |
1448 |
sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile); |
sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile); |
1449 |
blen = strlen(buffer); |
nflen = strlen(nextfile); |
1450 |
|
|
1451 |
if (exclude_compiled != NULL && |
if (isdirectory(buffer)) |
1452 |
pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0) |
{ |
1453 |
continue; |
if (exclude_dir_compiled != NULL && |
1454 |
|
pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0) |
1455 |
if (include_compiled != NULL && |
continue; |
1456 |
pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0) |
|
1457 |
continue; |
if (include_dir_compiled != NULL && |
1458 |
|
pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0) |
1459 |
|
continue; |
1460 |
|
} |
1461 |
|
else |
1462 |
|
{ |
1463 |
|
if (exclude_compiled != NULL && |
1464 |
|
pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0) |
1465 |
|
continue; |
1466 |
|
|
1467 |
|
if (include_compiled != NULL && |
1468 |
|
pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0) |
1469 |
|
continue; |
1470 |
|
} |
1471 |
|
|
1472 |
frc = grep_or_recurse(buffer, dir_recurse, FALSE); |
frc = grep_or_recurse(buffer, dir_recurse, FALSE); |
1473 |
if (frc > 1) rc = frc; |
if (frc > 1) rc = frc; |
1490 |
argument at top level, we don't show the file name, unless we are only showing |
argument at top level, we don't show the file name, unless we are only showing |
1491 |
the file name, or the filename was forced (-H). */ |
the file name, or the filename was forced (-H). */ |
1492 |
|
|
1493 |
in = fopen(pathname, "r"); |
pathlen = strlen(pathname); |
1494 |
if (in == NULL) |
|
1495 |
|
/* Open using zlib if it is supported and the file name ends with .gz. */ |
1496 |
|
|
1497 |
|
#ifdef SUPPORT_LIBZ |
1498 |
|
if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0) |
1499 |
|
{ |
1500 |
|
ingz = gzopen(pathname, "rb"); |
1501 |
|
if (ingz == NULL) |
1502 |
|
{ |
1503 |
|
if (!silent) |
1504 |
|
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, |
1505 |
|
strerror(errno)); |
1506 |
|
return 2; |
1507 |
|
} |
1508 |
|
handle = (void *)ingz; |
1509 |
|
frtype = FR_LIBZ; |
1510 |
|
} |
1511 |
|
else |
1512 |
|
#endif |
1513 |
|
|
1514 |
|
/* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */ |
1515 |
|
|
1516 |
|
#ifdef SUPPORT_LIBBZ2 |
1517 |
|
if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0) |
1518 |
|
{ |
1519 |
|
inbz2 = BZ2_bzopen(pathname, "rb"); |
1520 |
|
handle = (void *)inbz2; |
1521 |
|
frtype = FR_LIBBZ2; |
1522 |
|
} |
1523 |
|
else |
1524 |
|
#endif |
1525 |
|
|
1526 |
|
/* Otherwise use plain fopen(). The label is so that we can come back here if |
1527 |
|
an attempt to read a .bz2 file indicates that it really is a plain file. */ |
1528 |
|
|
1529 |
|
#ifdef SUPPORT_LIBBZ2 |
1530 |
|
PLAIN_FILE: |
1531 |
|
#endif |
1532 |
|
{ |
1533 |
|
in = fopen(pathname, "rb"); |
1534 |
|
handle = (void *)in; |
1535 |
|
frtype = FR_PLAIN; |
1536 |
|
} |
1537 |
|
|
1538 |
|
/* All the opening methods return errno when they fail. */ |
1539 |
|
|
1540 |
|
if (handle == NULL) |
1541 |
{ |
{ |
1542 |
if (!silent) |
if (!silent) |
1543 |
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, |
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, |
1545 |
return 2; |
return 2; |
1546 |
} |
} |
1547 |
|
|
1548 |
rc = pcregrep(in, (filenames > FN_DEFAULT || |
/* Now grep the file */ |
1549 |
|
|
1550 |
|
rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT || |
1551 |
(filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL); |
(filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL); |
1552 |
|
|
1553 |
|
/* Close in an appropriate manner. */ |
1554 |
|
|
1555 |
|
#ifdef SUPPORT_LIBZ |
1556 |
|
if (frtype == FR_LIBZ) |
1557 |
|
gzclose(ingz); |
1558 |
|
else |
1559 |
|
#endif |
1560 |
|
|
1561 |
|
/* If it is a .bz2 file and the result is 2, it means that the first attempt to |
1562 |
|
read failed. If the error indicates that the file isn't in fact bzipped, try |
1563 |
|
again as a normal file. */ |
1564 |
|
|
1565 |
|
#ifdef SUPPORT_LIBBZ2 |
1566 |
|
if (frtype == FR_LIBBZ2) |
1567 |
|
{ |
1568 |
|
if (rc == 2) |
1569 |
|
{ |
1570 |
|
int errnum; |
1571 |
|
const char *err = BZ2_bzerror(inbz2, &errnum); |
1572 |
|
if (errnum == BZ_DATA_ERROR_MAGIC) |
1573 |
|
{ |
1574 |
|
BZ2_bzclose(inbz2); |
1575 |
|
goto PLAIN_FILE; |
1576 |
|
} |
1577 |
|
else if (!silent) |
1578 |
|
fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n", |
1579 |
|
pathname, err); |
1580 |
|
} |
1581 |
|
BZ2_bzclose(inbz2); |
1582 |
|
} |
1583 |
|
else |
1584 |
|
#endif |
1585 |
|
|
1586 |
|
/* Normal file close */ |
1587 |
|
|
1588 |
fclose(in); |
fclose(in); |
1589 |
|
|
1590 |
|
/* Pass back the yield from pcregrep(). */ |
1591 |
|
|
1592 |
return rc; |
return rc; |
1593 |
} |
} |
1594 |
|
|
1609 |
if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); |
if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); |
1610 |
} |
} |
1611 |
fprintf(stderr, "] [long options] [pattern] [files]\n"); |
fprintf(stderr, "] [long options] [pattern] [files]\n"); |
1612 |
fprintf(stderr, "Type `pcregrep --help' for more information.\n"); |
fprintf(stderr, "Type `pcregrep --help' for more information and the long " |
1613 |
|
"options.\n"); |
1614 |
return rc; |
return rc; |
1615 |
} |
} |
1616 |
|
|
1629 |
printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n"); |
printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n"); |
1630 |
printf("Search for PATTERN in each FILE or standard input.\n"); |
printf("Search for PATTERN in each FILE or standard input.\n"); |
1631 |
printf("PATTERN must be present if neither -e nor -f is used.\n"); |
printf("PATTERN must be present if neither -e nor -f is used.\n"); |
1632 |
printf("\"-\" can be used as a file name to mean STDIN.\n\n"); |
printf("\"-\" can be used as a file name to mean STDIN.\n"); |
|
printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); |
|
1633 |
|
|
1634 |
|
#ifdef SUPPORT_LIBZ |
1635 |
|
printf("Files whose names end in .gz are read using zlib.\n"); |
1636 |
|
#endif |
1637 |
|
|
1638 |
|
#ifdef SUPPORT_LIBBZ2 |
1639 |
|
printf("Files whose names end in .bz2 are read using bzlib2.\n"); |
1640 |
|
#endif |
1641 |
|
|
1642 |
|
#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 |
1643 |
|
printf("Other files and the standard input are read as plain files.\n\n"); |
1644 |
|
#else |
1645 |
|
printf("All files are read as plain files, without any interpretation.\n\n"); |
1646 |
|
#endif |
1647 |
|
|
1648 |
|
printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); |
1649 |
printf("Options:\n"); |
printf("Options:\n"); |
1650 |
|
|
1651 |
for (op = optionlist; op->one_char != 0; op++) |
for (op = optionlist; op->one_char != 0; op++) |
1653 |
int n; |
int n; |
1654 |
char s[4]; |
char s[4]; |
1655 |
if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " "); |
if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " "); |
1656 |
printf(" %s --%s%n", s, op->long_name, &n); |
n = 30 - printf(" %s --%s", s, op->long_name); |
|
n = 30 - n; |
|
1657 |
if (n < 1) n = 1; |
if (n < 1) n = 1; |
1658 |
printf("%.*s%s\n", n, " ", op->help_text); |
printf("%.*s%s\n", n, " ", op->help_text); |
1659 |
} |
} |
1678 |
{ |
{ |
1679 |
switch(letter) |
switch(letter) |
1680 |
{ |
{ |
1681 |
|
case N_FOFFSETS: file_offsets = TRUE; break; |
1682 |
case N_HELP: help(); exit(0); |
case N_HELP: help(); exit(0); |
1683 |
|
case N_LOFFSETS: line_offsets = number = TRUE; break; |
1684 |
case 'c': count_only = TRUE; break; |
case 'c': count_only = TRUE; break; |
1685 |
case 'F': process_options |= PO_FIXED_STRINGS; break; |
case 'F': process_options |= PO_FIXED_STRINGS; break; |
1686 |
case 'H': filenames = FN_FORCE; break; |
case 'H': filenames = FN_FORCE; break; |
1871 |
|
|
1872 |
/* Set the default line ending value from the default in the PCRE library; |
/* Set the default line ending value from the default in the PCRE library; |
1873 |
"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf". |
"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf". |
1874 |
*/ |
Note that the return values from pcre_config(), though derived from the ASCII |
1875 |
|
codes, are the same in EBCDIC environments, so we must use the actual values |
1876 |
|
rather than escapes such as as '\r'. */ |
1877 |
|
|
1878 |
(void)pcre_config(PCRE_CONFIG_NEWLINE, &i); |
(void)pcre_config(PCRE_CONFIG_NEWLINE, &i); |
1879 |
switch(i) |
switch(i) |
1880 |
{ |
{ |
1881 |
default: newline = (char *)"lf"; break; |
default: newline = (char *)"lf"; break; |
1882 |
case '\r': newline = (char *)"cr"; break; |
case 13: newline = (char *)"cr"; break; |
1883 |
case ('\r' << 8) | '\n': newline = (char *)"crlf"; break; |
case (13 << 8) | 10: newline = (char *)"crlf"; break; |
1884 |
case -1: newline = (char *)"any"; break; |
case -1: newline = (char *)"any"; break; |
1885 |
case -2: newline = (char *)"anycrlf"; break; |
case -2: newline = (char *)"anycrlf"; break; |
1886 |
} |
} |
1887 |
|
|
1888 |
/* Process the options */ |
/* Process the options */ |
2119 |
if (before_context == 0) before_context = both_context; |
if (before_context == 0) before_context = both_context; |
2120 |
} |
} |
2121 |
|
|
2122 |
|
/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted. |
2123 |
|
However, the latter two set the only_matching flag. */ |
2124 |
|
|
2125 |
|
if ((only_matching && (file_offsets || line_offsets)) || |
2126 |
|
(file_offsets && line_offsets)) |
2127 |
|
{ |
2128 |
|
fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets " |
2129 |
|
"and/or --line-offsets\n"); |
2130 |
|
exit(usage(2)); |
2131 |
|
} |
2132 |
|
|
2133 |
|
if (file_offsets || line_offsets) only_matching = TRUE; |
2134 |
|
|
2135 |
/* If a locale has not been provided as an option, see if the LC_CTYPE or |
/* If a locale has not been provided as an option, see if the LC_CTYPE or |
2136 |
LC_ALL environment variable is set, and if so, use it. */ |
LC_ALL environment variable is set, and if so, use it. */ |
2137 |
|
|
2365 |
} |
} |
2366 |
} |
} |
2367 |
|
|
2368 |
|
if (exclude_dir_pattern != NULL) |
2369 |
|
{ |
2370 |
|
exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr, |
2371 |
|
pcretables); |
2372 |
|
if (exclude_dir_compiled == NULL) |
2373 |
|
{ |
2374 |
|
fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n", |
2375 |
|
errptr, error); |
2376 |
|
goto EXIT2; |
2377 |
|
} |
2378 |
|
} |
2379 |
|
|
2380 |
|
if (include_dir_pattern != NULL) |
2381 |
|
{ |
2382 |
|
include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr, |
2383 |
|
pcretables); |
2384 |
|
if (include_dir_compiled == NULL) |
2385 |
|
{ |
2386 |
|
fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n", |
2387 |
|
errptr, error); |
2388 |
|
goto EXIT2; |
2389 |
|
} |
2390 |
|
} |
2391 |
|
|
2392 |
/* If there are no further arguments, do the business on stdin and exit. */ |
/* If there are no further arguments, do the business on stdin and exit. */ |
2393 |
|
|
2394 |
if (i >= argc) |
if (i >= argc) |
2395 |
{ |
{ |
2396 |
rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL); |
rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL); |
2397 |
goto EXIT; |
goto EXIT; |
2398 |
} |
} |
2399 |
|
|