3 |
*************************************************/ |
*************************************************/ |
4 |
|
|
5 |
/* This is a grep program that uses the PCRE regular expression library to do |
/* This is a grep program that uses the PCRE regular expression library to do |
6 |
its pattern matching. On a Unix system it can recurse into directories. */ |
its pattern matching. On a Unix or Win32 system it can recurse into |
7 |
|
directories. |
8 |
|
|
9 |
|
Copyright (c) 1997-2011 University of Cambridge |
10 |
|
|
11 |
|
----------------------------------------------------------------------------- |
12 |
|
Redistribution and use in source and binary forms, with or without |
13 |
|
modification, are permitted provided that the following conditions are met: |
14 |
|
|
15 |
|
* Redistributions of source code must retain the above copyright notice, |
16 |
|
this list of conditions and the following disclaimer. |
17 |
|
|
18 |
|
* Redistributions in binary form must reproduce the above copyright |
19 |
|
notice, this list of conditions and the following disclaimer in the |
20 |
|
documentation and/or other materials provided with the distribution. |
21 |
|
|
22 |
|
* Neither the name of the University of Cambridge nor the names of its |
23 |
|
contributors may be used to endorse or promote products derived from |
24 |
|
this software without specific prior written permission. |
25 |
|
|
26 |
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
27 |
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
28 |
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
29 |
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
30 |
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
31 |
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
32 |
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
33 |
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
34 |
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
35 |
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
36 |
|
POSSIBILITY OF SUCH DAMAGE. |
37 |
|
----------------------------------------------------------------------------- |
38 |
|
*/ |
39 |
|
|
40 |
|
#ifdef HAVE_CONFIG_H |
41 |
|
#include "config.h" |
42 |
|
#endif |
43 |
|
|
44 |
#include <ctype.h> |
#include <ctype.h> |
45 |
|
#include <locale.h> |
46 |
#include <stdio.h> |
#include <stdio.h> |
47 |
#include <string.h> |
#include <string.h> |
48 |
#include <stdlib.h> |
#include <stdlib.h> |
49 |
#include <errno.h> |
#include <errno.h> |
50 |
#include "config.h" |
|
51 |
|
#include <sys/types.h> |
52 |
|
#include <sys/stat.h> |
53 |
|
|
54 |
|
#ifdef HAVE_UNISTD_H |
55 |
|
#include <unistd.h> |
56 |
|
#endif |
57 |
|
|
58 |
|
#ifdef SUPPORT_LIBZ |
59 |
|
#include <zlib.h> |
60 |
|
#endif |
61 |
|
|
62 |
|
#ifdef SUPPORT_LIBBZ2 |
63 |
|
#include <bzlib.h> |
64 |
|
#endif |
65 |
|
|
66 |
#include "pcre.h" |
#include "pcre.h" |
67 |
|
|
68 |
#define FALSE 0 |
#define FALSE 0 |
70 |
|
|
71 |
typedef int BOOL; |
typedef int BOOL; |
72 |
|
|
|
#define VERSION "2.0 01-Aug-2001" |
|
73 |
#define MAX_PATTERN_COUNT 100 |
#define MAX_PATTERN_COUNT 100 |
74 |
|
#define OFFSET_SIZE 99 |
75 |
|
|
76 |
|
#if BUFSIZ > 8192 |
77 |
|
#define PATBUFSIZE BUFSIZ |
78 |
|
#else |
79 |
|
#define PATBUFSIZE 8192 |
80 |
|
#endif |
81 |
|
|
82 |
|
/* Values for the "filenames" variable, which specifies options for file name |
83 |
|
output. The order is important; it is assumed that a file name is wanted for |
84 |
|
all values greater than FN_DEFAULT. */ |
85 |
|
|
86 |
|
enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE }; |
87 |
|
|
88 |
|
/* File reading styles */ |
89 |
|
|
90 |
|
enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 }; |
91 |
|
|
92 |
|
/* Actions for the -d and -D options */ |
93 |
|
|
94 |
|
enum { dee_READ, dee_SKIP, dee_RECURSE }; |
95 |
|
enum { DEE_READ, DEE_SKIP }; |
96 |
|
|
97 |
|
/* Actions for special processing options (flag bits) */ |
98 |
|
|
99 |
|
#define PO_WORD_MATCH 0x0001 |
100 |
|
#define PO_LINE_MATCH 0x0002 |
101 |
|
#define PO_FIXED_STRINGS 0x0004 |
102 |
|
|
103 |
|
/* Line ending types */ |
104 |
|
|
105 |
|
enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF }; |
106 |
|
|
107 |
|
/* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some |
108 |
|
environments), a warning is issued if the value of fwrite() is ignored. |
109 |
|
Unfortunately, casting to (void) does not suppress the warning. To get round |
110 |
|
this, we use a macro that compiles a fudge. Oddly, this does not also seem to |
111 |
|
apply to fprintf(). */ |
112 |
|
|
113 |
|
#define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {} |
114 |
|
|
115 |
|
|
116 |
|
|
117 |
/************************************************* |
/************************************************* |
118 |
* Global variables * |
* Global variables * |
119 |
*************************************************/ |
*************************************************/ |
120 |
|
|
121 |
|
/* Jeffrey Friedl has some debugging requirements that are not part of the |
122 |
|
regular code. */ |
123 |
|
|
124 |
|
#ifdef JFRIEDL_DEBUG |
125 |
|
static int S_arg = -1; |
126 |
|
static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */ |
127 |
|
static unsigned int jfriedl_XT = 0; /* replicate text this many times */ |
128 |
|
static const char *jfriedl_prefix = ""; |
129 |
|
static const char *jfriedl_postfix = ""; |
130 |
|
#endif |
131 |
|
|
132 |
|
static int endlinetype; |
133 |
|
|
134 |
|
static char *colour_string = (char *)"1;31"; |
135 |
|
static char *colour_option = NULL; |
136 |
|
static char *dee_option = NULL; |
137 |
|
static char *DEE_option = NULL; |
138 |
|
static char *main_buffer = NULL; |
139 |
|
static char *newline = NULL; |
140 |
static char *pattern_filename = NULL; |
static char *pattern_filename = NULL; |
141 |
|
static char *stdin_name = (char *)"(standard input)"; |
142 |
|
static char *locale = NULL; |
143 |
|
|
144 |
|
static const unsigned char *pcretables = NULL; |
145 |
|
|
146 |
static int pattern_count = 0; |
static int pattern_count = 0; |
147 |
static pcre **pattern_list; |
static pcre **pattern_list = NULL; |
148 |
static pcre_extra **hints_list; |
static pcre_extra **hints_list = NULL; |
149 |
|
|
150 |
|
static char *include_pattern = NULL; |
151 |
|
static char *exclude_pattern = NULL; |
152 |
|
static char *include_dir_pattern = NULL; |
153 |
|
static char *exclude_dir_pattern = NULL; |
154 |
|
|
155 |
|
static pcre *include_compiled = NULL; |
156 |
|
static pcre *exclude_compiled = NULL; |
157 |
|
static pcre *include_dir_compiled = NULL; |
158 |
|
static pcre *exclude_dir_compiled = NULL; |
159 |
|
|
160 |
|
static int after_context = 0; |
161 |
|
static int before_context = 0; |
162 |
|
static int both_context = 0; |
163 |
|
static int bufthird = PCREGREP_BUFSIZE; |
164 |
|
static int bufsize = 3*PCREGREP_BUFSIZE; |
165 |
|
static int dee_action = dee_READ; |
166 |
|
static int DEE_action = DEE_READ; |
167 |
|
static int error_count = 0; |
168 |
|
static int filenames = FN_DEFAULT; |
169 |
|
static int only_matching = -1; |
170 |
|
static int process_options = 0; |
171 |
|
static int study_options = 0; |
172 |
|
|
173 |
|
static unsigned long int match_limit = 0; |
174 |
|
static unsigned long int match_limit_recursion = 0; |
175 |
|
|
176 |
static BOOL count_only = FALSE; |
static BOOL count_only = FALSE; |
177 |
static BOOL filenames = TRUE; |
static BOOL do_colour = FALSE; |
178 |
static BOOL filenames_only = FALSE; |
static BOOL file_offsets = FALSE; |
179 |
|
static BOOL hyphenpending = FALSE; |
180 |
static BOOL invert = FALSE; |
static BOOL invert = FALSE; |
181 |
|
static BOOL line_buffered = FALSE; |
182 |
|
static BOOL line_offsets = FALSE; |
183 |
|
static BOOL multiline = FALSE; |
184 |
static BOOL number = FALSE; |
static BOOL number = FALSE; |
185 |
static BOOL recurse = FALSE; |
static BOOL omit_zero_count = FALSE; |
186 |
|
static BOOL resource_error = FALSE; |
187 |
|
static BOOL quiet = FALSE; |
188 |
static BOOL silent = FALSE; |
static BOOL silent = FALSE; |
189 |
static BOOL whole_lines = FALSE; |
static BOOL utf8 = FALSE; |
190 |
|
|
191 |
/* Structure for options and list of them */ |
/* Structure for options and list of them */ |
192 |
|
|
193 |
|
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER, |
194 |
|
OP_OP_NUMBER, OP_PATLIST }; |
195 |
|
|
196 |
typedef struct option_item { |
typedef struct option_item { |
197 |
|
int type; |
198 |
int one_char; |
int one_char; |
199 |
char *long_name; |
void *dataptr; |
200 |
char *help_text; |
const char *long_name; |
201 |
|
const char *help_text; |
202 |
} option_item; |
} option_item; |
203 |
|
|
204 |
|
/* Options without a single-letter equivalent get a negative value. This can be |
205 |
|
used to identify them. */ |
206 |
|
|
207 |
|
#define N_COLOUR (-1) |
208 |
|
#define N_EXCLUDE (-2) |
209 |
|
#define N_EXCLUDE_DIR (-3) |
210 |
|
#define N_HELP (-4) |
211 |
|
#define N_INCLUDE (-5) |
212 |
|
#define N_INCLUDE_DIR (-6) |
213 |
|
#define N_LABEL (-7) |
214 |
|
#define N_LOCALE (-8) |
215 |
|
#define N_NULL (-9) |
216 |
|
#define N_LOFFSETS (-10) |
217 |
|
#define N_FOFFSETS (-11) |
218 |
|
#define N_LBUFFER (-12) |
219 |
|
#define N_M_LIMIT (-13) |
220 |
|
#define N_M_LIMIT_REC (-14) |
221 |
|
#define N_BUFSIZE (-15) |
222 |
|
|
223 |
static option_item optionlist[] = { |
static option_item optionlist[] = { |
224 |
{ -1, "help", "display this help and exit" }, |
{ OP_NODATA, N_NULL, NULL, "", " terminate options" }, |
225 |
{ 'c', "count", "print only a count of matching lines per FILE" }, |
{ OP_NODATA, N_HELP, NULL, "help", "display this help and exit" }, |
226 |
{ 'h', "no-filename", "suppress the prefixing filename on output" }, |
{ OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" }, |
227 |
{ 'i', "ignore-case", "ignore case distinctions" }, |
{ OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" }, |
228 |
{ 'l', "files-with-matches", "print only FILE names containing matches" }, |
{ OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" }, |
229 |
{ 'n', "line-number", "print line number with output lines" }, |
{ OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" }, |
230 |
{ 'r', "recursive", "recursively scan sub-directories" }, |
{ OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" }, |
231 |
{ 's', "no-messages", "suppress error messages" }, |
{ OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" }, |
232 |
{ 'V', "version", "print version information and exit" }, |
{ OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" }, |
233 |
{ 'v', "invert-match", "select non-matching lines" }, |
{ OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" }, |
234 |
{ 'x', "line-regex", "force PATTERN to match only whole lines" }, |
{ OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" }, |
235 |
{ 'x', "line-regexp", "force PATTERN to match only whole lines" }, |
{ OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" }, |
236 |
{ 0, NULL, NULL } |
{ OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" }, |
237 |
|
{ OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" }, |
238 |
|
{ OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" }, |
239 |
|
{ OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" }, |
240 |
|
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" }, |
241 |
|
{ OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" }, |
242 |
|
{ OP_NODATA, 'j', NULL, "jit", "use JIT compiler if available" }, |
243 |
|
{ OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" }, |
244 |
|
{ OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" }, |
245 |
|
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" }, |
246 |
|
{ OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" }, |
247 |
|
{ OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" }, |
248 |
|
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, |
249 |
|
{ OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" }, |
250 |
|
{ OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" }, |
251 |
|
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" }, |
252 |
|
{ OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" }, |
253 |
|
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" }, |
254 |
|
{ OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" }, |
255 |
|
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" }, |
256 |
|
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" }, |
257 |
|
{ OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" }, |
258 |
|
{ OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" }, |
259 |
|
{ OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" }, |
260 |
|
{ OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" }, |
261 |
|
|
262 |
|
/* These two were accidentally implemented with underscores instead of |
263 |
|
hyphens in the option names. As this was not discovered for several releases, |
264 |
|
the incorrect versions are left in the table for compatibility. However, the |
265 |
|
--help function misses out any option that has an underscore in its name. */ |
266 |
|
|
267 |
|
{ OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" }, |
268 |
|
{ OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" }, |
269 |
|
|
270 |
|
#ifdef JFRIEDL_DEBUG |
271 |
|
{ OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" }, |
272 |
|
#endif |
273 |
|
{ OP_NODATA, 's', NULL, "no-messages", "suppress error messages" }, |
274 |
|
{ OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" }, |
275 |
|
{ OP_NODATA, 'V', NULL, "version", "print version information and exit" }, |
276 |
|
{ OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" }, |
277 |
|
{ OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" }, |
278 |
|
{ OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, |
279 |
|
{ OP_NODATA, 0, NULL, NULL, NULL } |
280 |
}; |
}; |
281 |
|
|
282 |
|
/* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F |
283 |
|
options. These set the 1, 2, and 4 bits in process_options, respectively. Note |
284 |
|
that the combination of -w and -x has the same effect as -x on its own, so we |
285 |
|
can treat them as the same. */ |
286 |
|
|
287 |
|
static const char *prefix[] = { |
288 |
|
"", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" }; |
289 |
|
|
290 |
|
static const char *suffix[] = { |
291 |
|
"", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" }; |
292 |
|
|
293 |
|
/* UTF-8 tables - used only when the newline setting is "any". */ |
294 |
|
|
295 |
|
const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
296 |
|
|
297 |
|
const char utf8_table4[] = { |
298 |
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
299 |
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
300 |
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
301 |
|
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; |
302 |
|
|
303 |
|
|
304 |
|
|
305 |
/************************************************* |
/************************************************* |
306 |
* Functions for directory scanning * |
* Exit from the program * |
307 |
|
*************************************************/ |
308 |
|
|
309 |
|
/* If there has been a resource error, give a suitable message. |
310 |
|
|
311 |
|
Argument: the return code |
312 |
|
Returns: does not return |
313 |
|
*/ |
314 |
|
|
315 |
|
static void |
316 |
|
pcregrep_exit(int rc) |
317 |
|
{ |
318 |
|
if (resource_error) |
319 |
|
{ |
320 |
|
fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit " |
321 |
|
"was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT); |
322 |
|
fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n"); |
323 |
|
} |
324 |
|
|
325 |
|
exit(rc); |
326 |
|
} |
327 |
|
|
328 |
|
|
329 |
|
/************************************************* |
330 |
|
* OS-specific functions * |
331 |
*************************************************/ |
*************************************************/ |
332 |
|
|
333 |
/* These functions are defined so that they can be made system specific, |
/* These functions are defined so that they can be made system specific, |
334 |
although at present the only ones are for Unix, and for "no directory recursion |
although at present the only ones are for Unix, Win32, and for "no support". */ |
|
support". */ |
|
335 |
|
|
336 |
|
|
337 |
/************* Directory scanning in Unix ***********/ |
/************* Directory scanning in Unix ***********/ |
338 |
|
|
339 |
#if IS_UNIX |
#if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H |
340 |
#include <sys/types.h> |
#include <sys/types.h> |
341 |
#include <sys/stat.h> |
#include <sys/stat.h> |
342 |
#include <dirent.h> |
#include <dirent.h> |
343 |
|
|
344 |
typedef DIR directory_type; |
typedef DIR directory_type; |
345 |
|
|
346 |
int |
static int |
347 |
isdirectory(char *filename) |
isdirectory(char *filename) |
348 |
{ |
{ |
349 |
struct stat statbuf; |
struct stat statbuf; |
352 |
return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0; |
return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0; |
353 |
} |
} |
354 |
|
|
355 |
directory_type * |
static directory_type * |
356 |
opendirectory(char *filename) |
opendirectory(char *filename) |
357 |
{ |
{ |
358 |
return opendir(filename); |
return opendir(filename); |
359 |
} |
} |
360 |
|
|
361 |
char * |
static char * |
362 |
readdirectory(directory_type *dir) |
readdirectory(directory_type *dir) |
363 |
{ |
{ |
364 |
for (;;) |
for (;;) |
368 |
if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) |
if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) |
369 |
return dent->d_name; |
return dent->d_name; |
370 |
} |
} |
371 |
|
/* Control never reaches here */ |
372 |
|
} |
373 |
|
|
374 |
|
static void |
375 |
|
closedirectory(directory_type *dir) |
376 |
|
{ |
377 |
|
closedir(dir); |
378 |
|
} |
379 |
|
|
380 |
|
|
381 |
|
/************* Test for regular file in Unix **********/ |
382 |
|
|
383 |
|
static int |
384 |
|
isregfile(char *filename) |
385 |
|
{ |
386 |
|
struct stat statbuf; |
387 |
|
if (stat(filename, &statbuf) < 0) |
388 |
|
return 1; /* In the expectation that opening as a file will fail */ |
389 |
|
return (statbuf.st_mode & S_IFMT) == S_IFREG; |
390 |
|
} |
391 |
|
|
392 |
|
|
393 |
|
/************* Test for a terminal in Unix **********/ |
394 |
|
|
395 |
|
static BOOL |
396 |
|
is_stdout_tty(void) |
397 |
|
{ |
398 |
|
return isatty(fileno(stdout)); |
399 |
|
} |
400 |
|
|
401 |
|
static BOOL |
402 |
|
is_file_tty(FILE *f) |
403 |
|
{ |
404 |
|
return isatty(fileno(f)); |
405 |
|
} |
406 |
|
|
407 |
|
|
408 |
|
/************* Directory scanning in Win32 ***********/ |
409 |
|
|
410 |
|
/* I (Philip Hazel) have no means of testing this code. It was contributed by |
411 |
|
Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES |
412 |
|
when it did not exist. David Byron added a patch that moved the #include of |
413 |
|
<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. |
414 |
|
The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is |
415 |
|
undefined when it is indeed undefined. */ |
416 |
|
|
417 |
|
#elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H |
418 |
|
|
419 |
|
#ifndef STRICT |
420 |
|
# define STRICT |
421 |
|
#endif |
422 |
|
#ifndef WIN32_LEAN_AND_MEAN |
423 |
|
# define WIN32_LEAN_AND_MEAN |
424 |
|
#endif |
425 |
|
|
426 |
|
#include <windows.h> |
427 |
|
|
428 |
|
#ifndef INVALID_FILE_ATTRIBUTES |
429 |
|
#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF |
430 |
|
#endif |
431 |
|
|
432 |
|
typedef struct directory_type |
433 |
|
{ |
434 |
|
HANDLE handle; |
435 |
|
BOOL first; |
436 |
|
WIN32_FIND_DATA data; |
437 |
|
} directory_type; |
438 |
|
|
439 |
|
int |
440 |
|
isdirectory(char *filename) |
441 |
|
{ |
442 |
|
DWORD attr = GetFileAttributes(filename); |
443 |
|
if (attr == INVALID_FILE_ATTRIBUTES) |
444 |
|
return 0; |
445 |
|
return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0; |
446 |
|
} |
447 |
|
|
448 |
|
directory_type * |
449 |
|
opendirectory(char *filename) |
450 |
|
{ |
451 |
|
size_t len; |
452 |
|
char *pattern; |
453 |
|
directory_type *dir; |
454 |
|
DWORD err; |
455 |
|
len = strlen(filename); |
456 |
|
pattern = (char *) malloc(len + 3); |
457 |
|
dir = (directory_type *) malloc(sizeof(*dir)); |
458 |
|
if ((pattern == NULL) || (dir == NULL)) |
459 |
|
{ |
460 |
|
fprintf(stderr, "pcregrep: malloc failed\n"); |
461 |
|
pcregrep_exit(2); |
462 |
|
} |
463 |
|
memcpy(pattern, filename, len); |
464 |
|
memcpy(&(pattern[len]), "\\*", 3); |
465 |
|
dir->handle = FindFirstFile(pattern, &(dir->data)); |
466 |
|
if (dir->handle != INVALID_HANDLE_VALUE) |
467 |
|
{ |
468 |
|
free(pattern); |
469 |
|
dir->first = TRUE; |
470 |
|
return dir; |
471 |
|
} |
472 |
|
err = GetLastError(); |
473 |
|
free(pattern); |
474 |
|
free(dir); |
475 |
|
errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT; |
476 |
|
return NULL; |
477 |
|
} |
478 |
|
|
479 |
|
char * |
480 |
|
readdirectory(directory_type *dir) |
481 |
|
{ |
482 |
|
for (;;) |
483 |
|
{ |
484 |
|
if (!dir->first) |
485 |
|
{ |
486 |
|
if (!FindNextFile(dir->handle, &(dir->data))) |
487 |
|
return NULL; |
488 |
|
} |
489 |
|
else |
490 |
|
{ |
491 |
|
dir->first = FALSE; |
492 |
|
} |
493 |
|
if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0) |
494 |
|
return dir->data.cFileName; |
495 |
|
} |
496 |
|
#ifndef _MSC_VER |
497 |
return NULL; /* Keep compiler happy; never executed */ |
return NULL; /* Keep compiler happy; never executed */ |
498 |
|
#endif |
499 |
} |
} |
500 |
|
|
501 |
void |
void |
502 |
closedirectory(directory_type *dir) |
closedirectory(directory_type *dir) |
503 |
{ |
{ |
504 |
closedir(dir); |
FindClose(dir->handle); |
505 |
|
free(dir); |
506 |
} |
} |
507 |
|
|
508 |
|
|
509 |
#else |
/************* Test for regular file in Win32 **********/ |
510 |
|
|
511 |
|
/* I don't know how to do this, or if it can be done; assume all paths are |
512 |
|
regular if they are not directories. */ |
513 |
|
|
514 |
|
int isregfile(char *filename) |
515 |
|
{ |
516 |
|
return !isdirectory(filename); |
517 |
|
} |
518 |
|
|
519 |
|
|
520 |
|
/************* Test for a terminal in Win32 **********/ |
521 |
|
|
522 |
|
/* I don't know how to do this; assume never */ |
523 |
|
|
524 |
|
static BOOL |
525 |
|
is_stdout_tty(void) |
526 |
|
{ |
527 |
|
return FALSE; |
528 |
|
} |
529 |
|
|
530 |
|
static BOOL |
531 |
|
is_file_tty(FILE *f) |
532 |
|
{ |
533 |
|
return FALSE; |
534 |
|
} |
535 |
|
|
536 |
|
|
537 |
/************* Directory scanning when we can't do it ***********/ |
/************* Directory scanning when we can't do it ***********/ |
538 |
|
|
539 |
/* The type is void, and apart from isdirectory(), the functions do nothing. */ |
/* The type is void, and apart from isdirectory(), the functions do nothing. */ |
540 |
|
|
541 |
|
#else |
542 |
|
|
543 |
typedef void directory_type; |
typedef void directory_type; |
544 |
|
|
545 |
int isdirectory(char *filename) { return FALSE; } |
int isdirectory(char *filename) { return 0; } |
546 |
directory_type * opendirectory(char *filename) {} |
directory_type * opendirectory(char *filename) { return (directory_type*)0;} |
547 |
char *readdirectory(directory_type *dir) {} |
char *readdirectory(directory_type *dir) { return (char*)0;} |
548 |
void closedirectory(directory_type *dir) {} |
void closedirectory(directory_type *dir) {} |
549 |
|
|
550 |
|
|
551 |
|
/************* Test for regular when we can't do it **********/ |
552 |
|
|
553 |
|
/* Assume all files are regular. */ |
554 |
|
|
555 |
|
int isregfile(char *filename) { return 1; } |
556 |
|
|
557 |
|
|
558 |
|
/************* Test for a terminal when we can't do it **********/ |
559 |
|
|
560 |
|
static BOOL |
561 |
|
is_stdout_tty(void) |
562 |
|
{ |
563 |
|
return FALSE; |
564 |
|
} |
565 |
|
|
566 |
|
static BOOL |
567 |
|
is_file_tty(FILE *f) |
568 |
|
{ |
569 |
|
return FALSE; |
570 |
|
} |
571 |
|
|
572 |
#endif |
#endif |
573 |
|
|
574 |
|
|
575 |
|
|
576 |
#if ! HAVE_STRERROR |
#ifndef HAVE_STRERROR |
577 |
/************************************************* |
/************************************************* |
578 |
* Provide strerror() for non-ANSI libraries * |
* Provide strerror() for non-ANSI libraries * |
579 |
*************************************************/ |
*************************************************/ |
596 |
|
|
597 |
|
|
598 |
/************************************************* |
/************************************************* |
599 |
* Grep an individual file * |
* Read one line of input * |
600 |
*************************************************/ |
*************************************************/ |
601 |
|
|
602 |
|
/* Normally, input is read using fread() into a large buffer, so many lines may |
603 |
|
be read at once. However, doing this for tty input means that no output appears |
604 |
|
until a lot of input has been typed. Instead, tty input is handled line by |
605 |
|
line. We cannot use fgets() for this, because it does not stop at a binary |
606 |
|
zero, and therefore there is no way of telling how many characters it has read, |
607 |
|
because there may be binary zeros embedded in the data. |
608 |
|
|
609 |
|
Arguments: |
610 |
|
buffer the buffer to read into |
611 |
|
length the maximum number of characters to read |
612 |
|
f the file |
613 |
|
|
614 |
|
Returns: the number of characters read, zero at end of file |
615 |
|
*/ |
616 |
|
|
617 |
static int |
static int |
618 |
pcregrep(FILE *in, char *name) |
read_one_line(char *buffer, int length, FILE *f) |
619 |
{ |
{ |
620 |
int rc = 1; |
int c; |
621 |
int linenumber = 0; |
int yield = 0; |
622 |
int count = 0; |
while ((c = fgetc(f)) != EOF) |
623 |
int offsets[99]; |
{ |
624 |
char buffer[BUFSIZ]; |
buffer[yield++] = c; |
625 |
|
if (c == '\n' || yield >= length) break; |
626 |
|
} |
627 |
|
return yield; |
628 |
|
} |
629 |
|
|
630 |
|
|
631 |
|
|
632 |
|
/************************************************* |
633 |
|
* Find end of line * |
634 |
|
*************************************************/ |
635 |
|
|
636 |
|
/* The length of the endline sequence that is found is set via lenptr. This may |
637 |
|
be zero at the very end of the file if there is no line-ending sequence there. |
638 |
|
|
639 |
while (fgets(buffer, sizeof(buffer), in) != NULL) |
Arguments: |
640 |
|
p current position in line |
641 |
|
endptr end of available data |
642 |
|
lenptr where to put the length of the eol sequence |
643 |
|
|
644 |
|
Returns: pointer after the last byte of the line, |
645 |
|
including the newline byte(s) |
646 |
|
*/ |
647 |
|
|
648 |
|
static char * |
649 |
|
end_of_line(char *p, char *endptr, int *lenptr) |
650 |
|
{ |
651 |
|
switch(endlinetype) |
652 |
{ |
{ |
653 |
BOOL match = FALSE; |
default: /* Just in case */ |
654 |
int i; |
case EL_LF: |
655 |
int length = (int)strlen(buffer); |
while (p < endptr && *p != '\n') p++; |
656 |
if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0; |
if (p < endptr) |
657 |
linenumber++; |
{ |
658 |
|
*lenptr = 1; |
659 |
|
return p + 1; |
660 |
|
} |
661 |
|
*lenptr = 0; |
662 |
|
return endptr; |
663 |
|
|
664 |
for (i = 0; !match && i < pattern_count; i++) |
case EL_CR: |
665 |
|
while (p < endptr && *p != '\r') p++; |
666 |
|
if (p < endptr) |
667 |
{ |
{ |
668 |
match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0, |
*lenptr = 1; |
669 |
offsets, 99) >= 0; |
return p + 1; |
|
if (match && whole_lines && offsets[1] != length) match = FALSE; |
|
670 |
} |
} |
671 |
|
*lenptr = 0; |
672 |
|
return endptr; |
673 |
|
|
674 |
if (match != invert) |
case EL_CRLF: |
675 |
|
for (;;) |
676 |
{ |
{ |
677 |
if (count_only) count++; |
while (p < endptr && *p != '\r') p++; |
678 |
|
if (++p >= endptr) |
679 |
|
{ |
680 |
|
*lenptr = 0; |
681 |
|
return endptr; |
682 |
|
} |
683 |
|
if (*p == '\n') |
684 |
|
{ |
685 |
|
*lenptr = 2; |
686 |
|
return p + 1; |
687 |
|
} |
688 |
|
} |
689 |
|
break; |
690 |
|
|
691 |
|
case EL_ANYCRLF: |
692 |
|
while (p < endptr) |
693 |
|
{ |
694 |
|
int extra = 0; |
695 |
|
register int c = *((unsigned char *)p); |
696 |
|
|
697 |
else if (filenames_only) |
if (utf8 && c >= 0xc0) |
698 |
{ |
{ |
699 |
fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name); |
int gcii, gcss; |
700 |
return 0; |
extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ |
701 |
|
gcss = 6*extra; |
702 |
|
c = (c & utf8_table3[extra]) << gcss; |
703 |
|
for (gcii = 1; gcii <= extra; gcii++) |
704 |
|
{ |
705 |
|
gcss -= 6; |
706 |
|
c |= (p[gcii] & 0x3f) << gcss; |
707 |
|
} |
708 |
} |
} |
709 |
|
|
710 |
else if (silent) return 0; |
p += 1 + extra; |
711 |
|
|
712 |
else |
switch (c) |
713 |
{ |
{ |
714 |
if (name != NULL) fprintf(stdout, "%s:", name); |
case 0x0a: /* LF */ |
715 |
if (number) fprintf(stdout, "%d:", linenumber); |
*lenptr = 1; |
716 |
fprintf(stdout, "%s\n", buffer); |
return p; |
717 |
|
|
718 |
|
case 0x0d: /* CR */ |
719 |
|
if (p < endptr && *p == 0x0a) |
720 |
|
{ |
721 |
|
*lenptr = 2; |
722 |
|
p++; |
723 |
|
} |
724 |
|
else *lenptr = 1; |
725 |
|
return p; |
726 |
|
|
727 |
|
default: |
728 |
|
break; |
729 |
} |
} |
730 |
|
} /* End of loop for ANYCRLF case */ |
731 |
|
|
732 |
rc = 0; |
*lenptr = 0; /* Must have hit the end */ |
733 |
} |
return endptr; |
|
} |
|
734 |
|
|
735 |
if (count_only) |
case EL_ANY: |
736 |
{ |
while (p < endptr) |
737 |
if (name != NULL) fprintf(stdout, "%s:", name); |
{ |
738 |
fprintf(stdout, "%d\n", count); |
int extra = 0; |
739 |
} |
register int c = *((unsigned char *)p); |
740 |
|
|
741 |
return rc; |
if (utf8 && c >= 0xc0) |
742 |
} |
{ |
743 |
|
int gcii, gcss; |
744 |
|
extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ |
745 |
|
gcss = 6*extra; |
746 |
|
c = (c & utf8_table3[extra]) << gcss; |
747 |
|
for (gcii = 1; gcii <= extra; gcii++) |
748 |
|
{ |
749 |
|
gcss -= 6; |
750 |
|
c |= (p[gcii] & 0x3f) << gcss; |
751 |
|
} |
752 |
|
} |
753 |
|
|
754 |
|
p += 1 + extra; |
755 |
|
|
756 |
|
switch (c) |
757 |
|
{ |
758 |
|
case 0x0a: /* LF */ |
759 |
|
case 0x0b: /* VT */ |
760 |
|
case 0x0c: /* FF */ |
761 |
|
*lenptr = 1; |
762 |
|
return p; |
763 |
|
|
764 |
|
case 0x0d: /* CR */ |
765 |
|
if (p < endptr && *p == 0x0a) |
766 |
|
{ |
767 |
|
*lenptr = 2; |
768 |
|
p++; |
769 |
|
} |
770 |
|
else *lenptr = 1; |
771 |
|
return p; |
772 |
|
|
773 |
|
case 0x85: /* NEL */ |
774 |
|
*lenptr = utf8? 2 : 1; |
775 |
|
return p; |
776 |
|
|
777 |
|
case 0x2028: /* LS */ |
778 |
|
case 0x2029: /* PS */ |
779 |
|
*lenptr = 3; |
780 |
|
return p; |
781 |
|
|
782 |
|
default: |
783 |
|
break; |
784 |
|
} |
785 |
|
} /* End of loop for ANY case */ |
786 |
|
|
787 |
|
*lenptr = 0; /* Must have hit the end */ |
788 |
|
return endptr; |
789 |
|
} /* End of overall switch */ |
790 |
|
} |
791 |
|
|
792 |
|
|
793 |
|
|
794 |
/************************************************* |
/************************************************* |
795 |
* Grep a file or recurse into a directory * |
* Find start of previous line * |
796 |
*************************************************/ |
*************************************************/ |
797 |
|
|
798 |
static int |
/* This is called when looking back for before lines to print. |
|
grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames, |
|
|
BOOL only_one_at_top) |
|
|
{ |
|
|
int rc = 1; |
|
|
int sep; |
|
|
FILE *in; |
|
799 |
|
|
800 |
/* If the file is a directory and we are recursing, scan each file within it. |
Arguments: |
801 |
The scanning code is localized so it can be made system-specific. */ |
p start of the subsequent line |
802 |
|
startptr start of available data |
803 |
|
|
804 |
if ((sep = isdirectory(filename)) != 0 && recurse) |
Returns: pointer to the start of the previous line |
805 |
{ |
*/ |
|
char buffer[1024]; |
|
|
char *nextfile; |
|
|
directory_type *dir = opendirectory(filename); |
|
806 |
|
|
807 |
if (dir == NULL) |
static char * |
808 |
{ |
previous_line(char *p, char *startptr) |
809 |
fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename, |
{ |
810 |
strerror(errno)); |
switch(endlinetype) |
811 |
return 2; |
{ |
812 |
} |
default: /* Just in case */ |
813 |
|
case EL_LF: |
814 |
|
p--; |
815 |
|
while (p > startptr && p[-1] != '\n') p--; |
816 |
|
return p; |
817 |
|
|
818 |
|
case EL_CR: |
819 |
|
p--; |
820 |
|
while (p > startptr && p[-1] != '\n') p--; |
821 |
|
return p; |
822 |
|
|
823 |
while ((nextfile = readdirectory(dir)) != NULL) |
case EL_CRLF: |
824 |
|
for (;;) |
825 |
{ |
{ |
826 |
int frc; |
p -= 2; |
827 |
sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile); |
while (p > startptr && p[-1] != '\n') p--; |
828 |
frc = grep_or_recurse(buffer, recurse, TRUE, FALSE); |
if (p <= startptr + 1 || p[-2] == '\r') return p; |
|
if (frc == 0 && rc == 1) rc = 0; |
|
829 |
} |
} |
830 |
|
return p; /* But control should never get here */ |
831 |
|
|
832 |
closedirectory(dir); |
case EL_ANY: |
833 |
return rc; |
case EL_ANYCRLF: |
834 |
} |
if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--; |
835 |
|
if (utf8) while ((*p & 0xc0) == 0x80) p--; |
836 |
|
|
837 |
/* If the file is not a directory, or we are not recursing, scan it. If this is |
while (p > startptr) |
838 |
the first and only argument at top level, we don't show the file name. |
{ |
839 |
Otherwise, control is via the show_filenames variable. */ |
register int c; |
840 |
|
char *pp = p - 1; |
841 |
|
|
842 |
in = fopen(filename, "r"); |
if (utf8) |
843 |
if (in == NULL) |
{ |
844 |
{ |
int extra = 0; |
845 |
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno)); |
while ((*pp & 0xc0) == 0x80) pp--; |
846 |
return 2; |
c = *((unsigned char *)pp); |
847 |
} |
if (c >= 0xc0) |
848 |
|
{ |
849 |
|
int gcii, gcss; |
850 |
|
extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */ |
851 |
|
gcss = 6*extra; |
852 |
|
c = (c & utf8_table3[extra]) << gcss; |
853 |
|
for (gcii = 1; gcii <= extra; gcii++) |
854 |
|
{ |
855 |
|
gcss -= 6; |
856 |
|
c |= (pp[gcii] & 0x3f) << gcss; |
857 |
|
} |
858 |
|
} |
859 |
|
} |
860 |
|
else c = *((unsigned char *)pp); |
861 |
|
|
862 |
rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL); |
if (endlinetype == EL_ANYCRLF) switch (c) |
863 |
fclose(in); |
{ |
864 |
return rc; |
case 0x0a: /* LF */ |
865 |
} |
case 0x0d: /* CR */ |
866 |
|
return p; |
867 |
|
|
868 |
|
default: |
869 |
|
break; |
870 |
|
} |
871 |
|
|
872 |
|
else switch (c) |
873 |
|
{ |
874 |
|
case 0x0a: /* LF */ |
875 |
|
case 0x0b: /* VT */ |
876 |
|
case 0x0c: /* FF */ |
877 |
|
case 0x0d: /* CR */ |
878 |
|
case 0x85: /* NEL */ |
879 |
|
case 0x2028: /* LS */ |
880 |
|
case 0x2029: /* PS */ |
881 |
|
return p; |
882 |
|
|
883 |
|
default: |
884 |
|
break; |
885 |
|
} |
886 |
|
|
887 |
/************************************************* |
p = pp; /* Back one character */ |
888 |
* Usage function * |
} /* End of loop for ANY case */ |
|
*************************************************/ |
|
889 |
|
|
890 |
static int |
return startptr; /* Hit start of data */ |
891 |
usage(int rc) |
} /* End of overall switch */ |
|
{ |
|
|
fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n"); |
|
|
fprintf(stderr, "Type `pcregrep --help' for more information.\n"); |
|
|
return rc; |
|
892 |
} |
} |
893 |
|
|
894 |
|
|
895 |
|
|
896 |
|
|
897 |
|
|
898 |
/************************************************* |
/************************************************* |
899 |
* Help function * |
* Print the previous "after" lines * |
900 |
*************************************************/ |
*************************************************/ |
901 |
|
|
902 |
static void |
/* This is called if we are about to lose said lines because of buffer filling, |
903 |
|
and at the end of the file. The data in the line is written using fwrite() so |
904 |
|
that a binary zero does not terminate it. |
905 |
|
|
906 |
|
Arguments: |
907 |
|
lastmatchnumber the number of the last matching line, plus one |
908 |
|
lastmatchrestart where we restarted after the last match |
909 |
|
endptr end of available data |
910 |
|
printname filename for printing |
911 |
|
|
912 |
|
Returns: nothing |
913 |
|
*/ |
914 |
|
|
915 |
|
static void do_after_lines(int lastmatchnumber, char *lastmatchrestart, |
916 |
|
char *endptr, char *printname) |
917 |
|
{ |
918 |
|
if (after_context > 0 && lastmatchnumber > 0) |
919 |
|
{ |
920 |
|
int count = 0; |
921 |
|
while (lastmatchrestart < endptr && count++ < after_context) |
922 |
|
{ |
923 |
|
int ellength; |
924 |
|
char *pp = lastmatchrestart; |
925 |
|
if (printname != NULL) fprintf(stdout, "%s-", printname); |
926 |
|
if (number) fprintf(stdout, "%d-", lastmatchnumber++); |
927 |
|
pp = end_of_line(pp, endptr, &ellength); |
928 |
|
FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); |
929 |
|
lastmatchrestart = pp; |
930 |
|
} |
931 |
|
hyphenpending = TRUE; |
932 |
|
} |
933 |
|
} |
934 |
|
|
935 |
|
|
936 |
|
|
937 |
|
/************************************************* |
938 |
|
* Apply patterns to subject till one matches * |
939 |
|
*************************************************/ |
940 |
|
|
941 |
|
/* This function is called to run through all patterns, looking for a match. It |
942 |
|
is used multiple times for the same subject when colouring is enabled, in order |
943 |
|
to find all possible matches. |
944 |
|
|
945 |
|
Arguments: |
946 |
|
matchptr the start of the subject |
947 |
|
length the length of the subject to match |
948 |
|
startoffset where to start matching |
949 |
|
offsets the offets vector to fill in |
950 |
|
mrc address of where to put the result of pcre_exec() |
951 |
|
|
952 |
|
Returns: TRUE if there was a match |
953 |
|
FALSE if there was no match |
954 |
|
invert if there was a non-fatal error |
955 |
|
*/ |
956 |
|
|
957 |
|
static BOOL |
958 |
|
match_patterns(char *matchptr, size_t length, int startoffset, int *offsets, |
959 |
|
int *mrc) |
960 |
|
{ |
961 |
|
int i; |
962 |
|
size_t slen = length; |
963 |
|
const char *msg = "this text:\n\n"; |
964 |
|
if (slen > 200) |
965 |
|
{ |
966 |
|
slen = 200; |
967 |
|
msg = "text that starts:\n\n"; |
968 |
|
} |
969 |
|
for (i = 0; i < pattern_count; i++) |
970 |
|
{ |
971 |
|
*mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, |
972 |
|
startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE); |
973 |
|
if (*mrc >= 0) return TRUE; |
974 |
|
if (*mrc == PCRE_ERROR_NOMATCH) continue; |
975 |
|
fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc); |
976 |
|
if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1); |
977 |
|
fprintf(stderr, "%s", msg); |
978 |
|
FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */ |
979 |
|
fprintf(stderr, "\n\n"); |
980 |
|
if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT) |
981 |
|
resource_error = TRUE; |
982 |
|
if (error_count++ > 20) |
983 |
|
{ |
984 |
|
fprintf(stderr, "pcregrep: Too many errors - abandoned.\n"); |
985 |
|
pcregrep_exit(2); |
986 |
|
} |
987 |
|
return invert; /* No more matching; don't show the line again */ |
988 |
|
} |
989 |
|
|
990 |
|
return FALSE; /* No match, no errors */ |
991 |
|
} |
992 |
|
|
993 |
|
|
994 |
|
|
995 |
|
/************************************************* |
996 |
|
* Grep an individual file * |
997 |
|
*************************************************/ |
998 |
|
|
999 |
|
/* This is called from grep_or_recurse() below. It uses a buffer that is three |
1000 |
|
times the value of bufthird. The matching point is never allowed to stray into |
1001 |
|
the top third of the buffer, thus keeping more of the file available for |
1002 |
|
context printing or for multiline scanning. For large files, the pointer will |
1003 |
|
be in the middle third most of the time, so the bottom third is available for |
1004 |
|
"before" context printing. |
1005 |
|
|
1006 |
|
Arguments: |
1007 |
|
handle the fopened FILE stream for a normal file |
1008 |
|
the gzFile pointer when reading is via libz |
1009 |
|
the BZFILE pointer when reading is via libbz2 |
1010 |
|
frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2 |
1011 |
|
filename the file name or NULL (for errors) |
1012 |
|
printname the file name if it is to be printed for each match |
1013 |
|
or NULL if the file name is not to be printed |
1014 |
|
it cannot be NULL if filenames[_nomatch]_only is set |
1015 |
|
|
1016 |
|
Returns: 0 if there was at least one match |
1017 |
|
1 otherwise (no matches) |
1018 |
|
2 if an overlong line is encountered |
1019 |
|
3 if there is a read error on a .bz2 file |
1020 |
|
*/ |
1021 |
|
|
1022 |
|
static int |
1023 |
|
pcregrep(void *handle, int frtype, char *filename, char *printname) |
1024 |
|
{ |
1025 |
|
int rc = 1; |
1026 |
|
int linenumber = 1; |
1027 |
|
int lastmatchnumber = 0; |
1028 |
|
int count = 0; |
1029 |
|
int filepos = 0; |
1030 |
|
int offsets[OFFSET_SIZE]; |
1031 |
|
char *lastmatchrestart = NULL; |
1032 |
|
char *ptr = main_buffer; |
1033 |
|
char *endptr; |
1034 |
|
size_t bufflength; |
1035 |
|
BOOL endhyphenpending = FALSE; |
1036 |
|
BOOL input_line_buffered = line_buffered; |
1037 |
|
FILE *in = NULL; /* Ensure initialized */ |
1038 |
|
|
1039 |
|
#ifdef SUPPORT_LIBZ |
1040 |
|
gzFile ingz = NULL; |
1041 |
|
#endif |
1042 |
|
|
1043 |
|
#ifdef SUPPORT_LIBBZ2 |
1044 |
|
BZFILE *inbz2 = NULL; |
1045 |
|
#endif |
1046 |
|
|
1047 |
|
|
1048 |
|
/* Do the first read into the start of the buffer and set up the pointer to end |
1049 |
|
of what we have. In the case of libz, a non-zipped .gz file will be read as a |
1050 |
|
plain file. However, if a .bz2 file isn't actually bzipped, the first read will |
1051 |
|
fail. */ |
1052 |
|
|
1053 |
|
#ifdef SUPPORT_LIBZ |
1054 |
|
if (frtype == FR_LIBZ) |
1055 |
|
{ |
1056 |
|
ingz = (gzFile)handle; |
1057 |
|
bufflength = gzread (ingz, main_buffer, bufsize); |
1058 |
|
} |
1059 |
|
else |
1060 |
|
#endif |
1061 |
|
|
1062 |
|
#ifdef SUPPORT_LIBBZ2 |
1063 |
|
if (frtype == FR_LIBBZ2) |
1064 |
|
{ |
1065 |
|
inbz2 = (BZFILE *)handle; |
1066 |
|
bufflength = BZ2_bzread(inbz2, main_buffer, bufsize); |
1067 |
|
if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */ |
1068 |
|
} /* without the cast it is unsigned. */ |
1069 |
|
else |
1070 |
|
#endif |
1071 |
|
|
1072 |
|
{ |
1073 |
|
in = (FILE *)handle; |
1074 |
|
if (is_file_tty(in)) input_line_buffered = TRUE; |
1075 |
|
bufflength = input_line_buffered? |
1076 |
|
read_one_line(main_buffer, bufsize, in) : |
1077 |
|
fread(main_buffer, 1, bufsize, in); |
1078 |
|
} |
1079 |
|
|
1080 |
|
endptr = main_buffer + bufflength; |
1081 |
|
|
1082 |
|
/* Loop while the current pointer is not at the end of the file. For large |
1083 |
|
files, endptr will be at the end of the buffer when we are in the middle of the |
1084 |
|
file, but ptr will never get there, because as soon as it gets over 2/3 of the |
1085 |
|
way, the buffer is shifted left and re-filled. */ |
1086 |
|
|
1087 |
|
while (ptr < endptr) |
1088 |
|
{ |
1089 |
|
int endlinelength; |
1090 |
|
int mrc = 0; |
1091 |
|
int startoffset = 0; |
1092 |
|
BOOL match; |
1093 |
|
char *matchptr = ptr; |
1094 |
|
char *t = ptr; |
1095 |
|
size_t length, linelength; |
1096 |
|
|
1097 |
|
/* At this point, ptr is at the start of a line. We need to find the length |
1098 |
|
of the subject string to pass to pcre_exec(). In multiline mode, it is the |
1099 |
|
length remainder of the data in the buffer. Otherwise, it is the length of |
1100 |
|
the next line, excluding the terminating newline. After matching, we always |
1101 |
|
advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE |
1102 |
|
option is used for compiling, so that any match is constrained to be in the |
1103 |
|
first line. */ |
1104 |
|
|
1105 |
|
t = end_of_line(t, endptr, &endlinelength); |
1106 |
|
linelength = t - ptr - endlinelength; |
1107 |
|
length = multiline? (size_t)(endptr - ptr) : linelength; |
1108 |
|
|
1109 |
|
/* Check to see if the line we are looking at extends right to the very end |
1110 |
|
of the buffer without a line terminator. This means the line is too long to |
1111 |
|
handle. */ |
1112 |
|
|
1113 |
|
if (endlinelength == 0 && t == main_buffer + bufsize) |
1114 |
|
{ |
1115 |
|
fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n" |
1116 |
|
"pcregrep: check the --buffer-size option\n", |
1117 |
|
linenumber, |
1118 |
|
(filename == NULL)? "" : " of file ", |
1119 |
|
(filename == NULL)? "" : filename); |
1120 |
|
return 2; |
1121 |
|
} |
1122 |
|
|
1123 |
|
/* Extra processing for Jeffrey Friedl's debugging. */ |
1124 |
|
|
1125 |
|
#ifdef JFRIEDL_DEBUG |
1126 |
|
if (jfriedl_XT || jfriedl_XR) |
1127 |
|
{ |
1128 |
|
#include <sys/time.h> |
1129 |
|
#include <time.h> |
1130 |
|
struct timeval start_time, end_time; |
1131 |
|
struct timezone dummy; |
1132 |
|
int i; |
1133 |
|
|
1134 |
|
if (jfriedl_XT) |
1135 |
|
{ |
1136 |
|
unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix); |
1137 |
|
const char *orig = ptr; |
1138 |
|
ptr = malloc(newlen + 1); |
1139 |
|
if (!ptr) { |
1140 |
|
printf("out of memory"); |
1141 |
|
pcregrep_exit(2); |
1142 |
|
} |
1143 |
|
endptr = ptr; |
1144 |
|
strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix); |
1145 |
|
for (i = 0; i < jfriedl_XT; i++) { |
1146 |
|
strncpy(endptr, orig, length); |
1147 |
|
endptr += length; |
1148 |
|
} |
1149 |
|
strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix); |
1150 |
|
length = newlen; |
1151 |
|
} |
1152 |
|
|
1153 |
|
if (gettimeofday(&start_time, &dummy) != 0) |
1154 |
|
perror("bad gettimeofday"); |
1155 |
|
|
1156 |
|
|
1157 |
|
for (i = 0; i < jfriedl_XR; i++) |
1158 |
|
match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, |
1159 |
|
PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0); |
1160 |
|
|
1161 |
|
if (gettimeofday(&end_time, &dummy) != 0) |
1162 |
|
perror("bad gettimeofday"); |
1163 |
|
|
1164 |
|
double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0)) |
1165 |
|
- |
1166 |
|
(start_time.tv_sec + (start_time.tv_usec / 1000000.0))); |
1167 |
|
|
1168 |
|
printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta); |
1169 |
|
return 0; |
1170 |
|
} |
1171 |
|
#endif |
1172 |
|
|
1173 |
|
/* We come back here after a match when the -o option (only_matching) is set, |
1174 |
|
in order to find any further matches in the same line. */ |
1175 |
|
|
1176 |
|
ONLY_MATCHING_RESTART: |
1177 |
|
|
1178 |
|
/* Run through all the patterns until one matches or there is an error other |
1179 |
|
than NOMATCH. This code is in a subroutine so that it can be re-used for |
1180 |
|
finding subsequent matches when colouring matched lines. */ |
1181 |
|
|
1182 |
|
match = match_patterns(matchptr, length, startoffset, offsets, &mrc); |
1183 |
|
|
1184 |
|
/* If it's a match or a not-match (as required), do what's wanted. */ |
1185 |
|
|
1186 |
|
if (match != invert) |
1187 |
|
{ |
1188 |
|
BOOL hyphenprinted = FALSE; |
1189 |
|
|
1190 |
|
/* We've failed if we want a file that doesn't have any matches. */ |
1191 |
|
|
1192 |
|
if (filenames == FN_NOMATCH_ONLY) return 1; |
1193 |
|
|
1194 |
|
/* Just count if just counting is wanted. */ |
1195 |
|
|
1196 |
|
if (count_only) count++; |
1197 |
|
|
1198 |
|
/* If all we want is a file name, there is no need to scan any more lines |
1199 |
|
in the file. */ |
1200 |
|
|
1201 |
|
else if (filenames == FN_MATCH_ONLY) |
1202 |
|
{ |
1203 |
|
fprintf(stdout, "%s\n", printname); |
1204 |
|
return 0; |
1205 |
|
} |
1206 |
|
|
1207 |
|
/* Likewise, if all we want is a yes/no answer. */ |
1208 |
|
|
1209 |
|
else if (quiet) return 0; |
1210 |
|
|
1211 |
|
/* The --only-matching option prints just the substring that matched, or a |
1212 |
|
captured portion of it, as long as this string is not empty, and the |
1213 |
|
--file-offsets and --line-offsets options output offsets for the matching |
1214 |
|
substring (they both force --only-matching = 0). None of these options |
1215 |
|
prints any context. Afterwards, adjust the start and then jump back to look |
1216 |
|
for further matches in the same line. If we are in invert mode, however, |
1217 |
|
nothing is printed and we do not restart - this could still be useful |
1218 |
|
because the return code is set. */ |
1219 |
|
|
1220 |
|
else if (only_matching >= 0) |
1221 |
|
{ |
1222 |
|
if (!invert) |
1223 |
|
{ |
1224 |
|
if (printname != NULL) fprintf(stdout, "%s:", printname); |
1225 |
|
if (number) fprintf(stdout, "%d:", linenumber); |
1226 |
|
if (line_offsets) |
1227 |
|
fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr), |
1228 |
|
offsets[1] - offsets[0]); |
1229 |
|
else if (file_offsets) |
1230 |
|
fprintf(stdout, "%d,%d\n", |
1231 |
|
(int)(filepos + matchptr + offsets[0] - ptr), |
1232 |
|
offsets[1] - offsets[0]); |
1233 |
|
else if (only_matching < mrc) |
1234 |
|
{ |
1235 |
|
int plen = offsets[2*only_matching + 1] - offsets[2*only_matching]; |
1236 |
|
if (plen > 0) |
1237 |
|
{ |
1238 |
|
if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string); |
1239 |
|
FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout); |
1240 |
|
if (do_colour) fprintf(stdout, "%c[00m", 0x1b); |
1241 |
|
fprintf(stdout, "\n"); |
1242 |
|
} |
1243 |
|
} |
1244 |
|
else if (printname != NULL || number) fprintf(stdout, "\n"); |
1245 |
|
match = FALSE; |
1246 |
|
if (line_buffered) fflush(stdout); |
1247 |
|
rc = 0; /* Had some success */ |
1248 |
|
startoffset = offsets[1]; /* Restart after the match */ |
1249 |
|
goto ONLY_MATCHING_RESTART; |
1250 |
|
} |
1251 |
|
} |
1252 |
|
|
1253 |
|
/* This is the default case when none of the above options is set. We print |
1254 |
|
the matching lines(s), possibly preceded and/or followed by other lines of |
1255 |
|
context. */ |
1256 |
|
|
1257 |
|
else |
1258 |
|
{ |
1259 |
|
/* See if there is a requirement to print some "after" lines from a |
1260 |
|
previous match. We never print any overlaps. */ |
1261 |
|
|
1262 |
|
if (after_context > 0 && lastmatchnumber > 0) |
1263 |
|
{ |
1264 |
|
int ellength; |
1265 |
|
int linecount = 0; |
1266 |
|
char *p = lastmatchrestart; |
1267 |
|
|
1268 |
|
while (p < ptr && linecount < after_context) |
1269 |
|
{ |
1270 |
|
p = end_of_line(p, ptr, &ellength); |
1271 |
|
linecount++; |
1272 |
|
} |
1273 |
|
|
1274 |
|
/* It is important to advance lastmatchrestart during this printing so |
1275 |
|
that it interacts correctly with any "before" printing below. Print |
1276 |
|
each line's data using fwrite() in case there are binary zeroes. */ |
1277 |
|
|
1278 |
|
while (lastmatchrestart < p) |
1279 |
|
{ |
1280 |
|
char *pp = lastmatchrestart; |
1281 |
|
if (printname != NULL) fprintf(stdout, "%s-", printname); |
1282 |
|
if (number) fprintf(stdout, "%d-", lastmatchnumber++); |
1283 |
|
pp = end_of_line(pp, endptr, &ellength); |
1284 |
|
FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout); |
1285 |
|
lastmatchrestart = pp; |
1286 |
|
} |
1287 |
|
if (lastmatchrestart != ptr) hyphenpending = TRUE; |
1288 |
|
} |
1289 |
|
|
1290 |
|
/* If there were non-contiguous lines printed above, insert hyphens. */ |
1291 |
|
|
1292 |
|
if (hyphenpending) |
1293 |
|
{ |
1294 |
|
fprintf(stdout, "--\n"); |
1295 |
|
hyphenpending = FALSE; |
1296 |
|
hyphenprinted = TRUE; |
1297 |
|
} |
1298 |
|
|
1299 |
|
/* See if there is a requirement to print some "before" lines for this |
1300 |
|
match. Again, don't print overlaps. */ |
1301 |
|
|
1302 |
|
if (before_context > 0) |
1303 |
|
{ |
1304 |
|
int linecount = 0; |
1305 |
|
char *p = ptr; |
1306 |
|
|
1307 |
|
while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) && |
1308 |
|
linecount < before_context) |
1309 |
|
{ |
1310 |
|
linecount++; |
1311 |
|
p = previous_line(p, main_buffer); |
1312 |
|
} |
1313 |
|
|
1314 |
|
if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted) |
1315 |
|
fprintf(stdout, "--\n"); |
1316 |
|
|
1317 |
|
while (p < ptr) |
1318 |
|
{ |
1319 |
|
int ellength; |
1320 |
|
char *pp = p; |
1321 |
|
if (printname != NULL) fprintf(stdout, "%s-", printname); |
1322 |
|
if (number) fprintf(stdout, "%d-", linenumber - linecount--); |
1323 |
|
pp = end_of_line(pp, endptr, &ellength); |
1324 |
|
FWRITE(p, 1, pp - p, stdout); |
1325 |
|
p = pp; |
1326 |
|
} |
1327 |
|
} |
1328 |
|
|
1329 |
|
/* Now print the matching line(s); ensure we set hyphenpending at the end |
1330 |
|
of the file if any context lines are being output. */ |
1331 |
|
|
1332 |
|
if (after_context > 0 || before_context > 0) |
1333 |
|
endhyphenpending = TRUE; |
1334 |
|
|
1335 |
|
if (printname != NULL) fprintf(stdout, "%s:", printname); |
1336 |
|
if (number) fprintf(stdout, "%d:", linenumber); |
1337 |
|
|
1338 |
|
/* In multiline mode, we want to print to the end of the line in which |
1339 |
|
the end of the matched string is found, so we adjust linelength and the |
1340 |
|
line number appropriately, but only when there actually was a match |
1341 |
|
(invert not set). Because the PCRE_FIRSTLINE option is set, the start of |
1342 |
|
the match will always be before the first newline sequence. */ |
1343 |
|
|
1344 |
|
if (multiline & !invert) |
1345 |
|
{ |
1346 |
|
char *endmatch = ptr + offsets[1]; |
1347 |
|
t = ptr; |
1348 |
|
while (t < endmatch) |
1349 |
|
{ |
1350 |
|
t = end_of_line(t, endptr, &endlinelength); |
1351 |
|
if (t < endmatch) linenumber++; else break; |
1352 |
|
} |
1353 |
|
linelength = t - ptr - endlinelength; |
1354 |
|
} |
1355 |
|
|
1356 |
|
/*** NOTE: Use only fwrite() to output the data line, so that binary |
1357 |
|
zeroes are treated as just another data character. */ |
1358 |
|
|
1359 |
|
/* This extra option, for Jeffrey Friedl's debugging requirements, |
1360 |
|
replaces the matched string, or a specific captured string if it exists, |
1361 |
|
with X. When this happens, colouring is ignored. */ |
1362 |
|
|
1363 |
|
#ifdef JFRIEDL_DEBUG |
1364 |
|
if (S_arg >= 0 && S_arg < mrc) |
1365 |
|
{ |
1366 |
|
int first = S_arg * 2; |
1367 |
|
int last = first + 1; |
1368 |
|
FWRITE(ptr, 1, offsets[first], stdout); |
1369 |
|
fprintf(stdout, "X"); |
1370 |
|
FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout); |
1371 |
|
} |
1372 |
|
else |
1373 |
|
#endif |
1374 |
|
|
1375 |
|
/* We have to split the line(s) up if colouring, and search for further |
1376 |
|
matches, but not of course if the line is a non-match. */ |
1377 |
|
|
1378 |
|
if (do_colour && !invert) |
1379 |
|
{ |
1380 |
|
int plength; |
1381 |
|
FWRITE(ptr, 1, offsets[0], stdout); |
1382 |
|
fprintf(stdout, "%c[%sm", 0x1b, colour_string); |
1383 |
|
FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout); |
1384 |
|
fprintf(stdout, "%c[00m", 0x1b); |
1385 |
|
for (;;) |
1386 |
|
{ |
1387 |
|
startoffset = offsets[1]; |
1388 |
|
if (startoffset >= linelength + endlinelength || |
1389 |
|
!match_patterns(matchptr, length, startoffset, offsets, &mrc)) |
1390 |
|
break; |
1391 |
|
FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout); |
1392 |
|
fprintf(stdout, "%c[%sm", 0x1b, colour_string); |
1393 |
|
FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout); |
1394 |
|
fprintf(stdout, "%c[00m", 0x1b); |
1395 |
|
} |
1396 |
|
|
1397 |
|
/* In multiline mode, we may have already printed the complete line |
1398 |
|
and its line-ending characters (if they matched the pattern), so there |
1399 |
|
may be no more to print. */ |
1400 |
|
|
1401 |
|
plength = (linelength + endlinelength) - startoffset; |
1402 |
|
if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout); |
1403 |
|
} |
1404 |
|
|
1405 |
|
/* Not colouring; no need to search for further matches */ |
1406 |
|
|
1407 |
|
else FWRITE(ptr, 1, linelength + endlinelength, stdout); |
1408 |
|
} |
1409 |
|
|
1410 |
|
/* End of doing what has to be done for a match. If --line-buffered was |
1411 |
|
given, flush the output. */ |
1412 |
|
|
1413 |
|
if (line_buffered) fflush(stdout); |
1414 |
|
rc = 0; /* Had some success */ |
1415 |
|
|
1416 |
|
/* Remember where the last match happened for after_context. We remember |
1417 |
|
where we are about to restart, and that line's number. */ |
1418 |
|
|
1419 |
|
lastmatchrestart = ptr + linelength + endlinelength; |
1420 |
|
lastmatchnumber = linenumber + 1; |
1421 |
|
} |
1422 |
|
|
1423 |
|
/* For a match in multiline inverted mode (which of course did not cause |
1424 |
|
anything to be printed), we have to move on to the end of the match before |
1425 |
|
proceeding. */ |
1426 |
|
|
1427 |
|
if (multiline && invert && match) |
1428 |
|
{ |
1429 |
|
int ellength; |
1430 |
|
char *endmatch = ptr + offsets[1]; |
1431 |
|
t = ptr; |
1432 |
|
while (t < endmatch) |
1433 |
|
{ |
1434 |
|
t = end_of_line(t, endptr, &ellength); |
1435 |
|
if (t <= endmatch) linenumber++; else break; |
1436 |
|
} |
1437 |
|
endmatch = end_of_line(endmatch, endptr, &ellength); |
1438 |
|
linelength = endmatch - ptr - ellength; |
1439 |
|
} |
1440 |
|
|
1441 |
|
/* Advance to after the newline and increment the line number. The file |
1442 |
|
offset to the current line is maintained in filepos. */ |
1443 |
|
|
1444 |
|
ptr += linelength + endlinelength; |
1445 |
|
filepos += (int)(linelength + endlinelength); |
1446 |
|
linenumber++; |
1447 |
|
|
1448 |
|
/* If input is line buffered, and the buffer is not yet full, read another |
1449 |
|
line and add it into the buffer. */ |
1450 |
|
|
1451 |
|
if (input_line_buffered && bufflength < bufsize) |
1452 |
|
{ |
1453 |
|
int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in); |
1454 |
|
bufflength += add; |
1455 |
|
endptr += add; |
1456 |
|
} |
1457 |
|
|
1458 |
|
/* If we haven't yet reached the end of the file (the buffer is full), and |
1459 |
|
the current point is in the top 1/3 of the buffer, slide the buffer down by |
1460 |
|
1/3 and refill it. Before we do this, if some unprinted "after" lines are |
1461 |
|
about to be lost, print them. */ |
1462 |
|
|
1463 |
|
if (bufflength >= bufsize && ptr > main_buffer + 2*bufthird) |
1464 |
|
{ |
1465 |
|
if (after_context > 0 && |
1466 |
|
lastmatchnumber > 0 && |
1467 |
|
lastmatchrestart < main_buffer + bufthird) |
1468 |
|
{ |
1469 |
|
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); |
1470 |
|
lastmatchnumber = 0; |
1471 |
|
} |
1472 |
|
|
1473 |
|
/* Now do the shuffle */ |
1474 |
|
|
1475 |
|
memmove(main_buffer, main_buffer + bufthird, 2*bufthird); |
1476 |
|
ptr -= bufthird; |
1477 |
|
|
1478 |
|
#ifdef SUPPORT_LIBZ |
1479 |
|
if (frtype == FR_LIBZ) |
1480 |
|
bufflength = 2*bufthird + |
1481 |
|
gzread (ingz, main_buffer + 2*bufthird, bufthird); |
1482 |
|
else |
1483 |
|
#endif |
1484 |
|
|
1485 |
|
#ifdef SUPPORT_LIBBZ2 |
1486 |
|
if (frtype == FR_LIBBZ2) |
1487 |
|
bufflength = 2*bufthird + |
1488 |
|
BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird); |
1489 |
|
else |
1490 |
|
#endif |
1491 |
|
|
1492 |
|
bufflength = 2*bufthird + |
1493 |
|
(input_line_buffered? |
1494 |
|
read_one_line(main_buffer + 2*bufthird, bufthird, in) : |
1495 |
|
fread(main_buffer + 2*bufthird, 1, bufthird, in)); |
1496 |
|
endptr = main_buffer + bufflength; |
1497 |
|
|
1498 |
|
/* Adjust any last match point */ |
1499 |
|
|
1500 |
|
if (lastmatchnumber > 0) lastmatchrestart -= bufthird; |
1501 |
|
} |
1502 |
|
} /* Loop through the whole file */ |
1503 |
|
|
1504 |
|
/* End of file; print final "after" lines if wanted; do_after_lines sets |
1505 |
|
hyphenpending if it prints something. */ |
1506 |
|
|
1507 |
|
if (only_matching < 0 && !count_only) |
1508 |
|
{ |
1509 |
|
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname); |
1510 |
|
hyphenpending |= endhyphenpending; |
1511 |
|
} |
1512 |
|
|
1513 |
|
/* Print the file name if we are looking for those without matches and there |
1514 |
|
were none. If we found a match, we won't have got this far. */ |
1515 |
|
|
1516 |
|
if (filenames == FN_NOMATCH_ONLY) |
1517 |
|
{ |
1518 |
|
fprintf(stdout, "%s\n", printname); |
1519 |
|
return 0; |
1520 |
|
} |
1521 |
|
|
1522 |
|
/* Print the match count if wanted */ |
1523 |
|
|
1524 |
|
if (count_only) |
1525 |
|
{ |
1526 |
|
if (count > 0 || !omit_zero_count) |
1527 |
|
{ |
1528 |
|
if (printname != NULL && filenames != FN_NONE) |
1529 |
|
fprintf(stdout, "%s:", printname); |
1530 |
|
fprintf(stdout, "%d\n", count); |
1531 |
|
} |
1532 |
|
} |
1533 |
|
|
1534 |
|
return rc; |
1535 |
|
} |
1536 |
|
|
1537 |
|
|
1538 |
|
|
1539 |
|
/************************************************* |
1540 |
|
* Grep a file or recurse into a directory * |
1541 |
|
*************************************************/ |
1542 |
|
|
1543 |
|
/* Given a path name, if it's a directory, scan all the files if we are |
1544 |
|
recursing; if it's a file, grep it. |
1545 |
|
|
1546 |
|
Arguments: |
1547 |
|
pathname the path to investigate |
1548 |
|
dir_recurse TRUE if recursing is wanted (-r or -drecurse) |
1549 |
|
only_one_at_top TRUE if the path is the only one at toplevel |
1550 |
|
|
1551 |
|
Returns: 0 if there was at least one match |
1552 |
|
1 if there were no matches |
1553 |
|
2 there was some kind of error |
1554 |
|
|
1555 |
|
However, file opening failures are suppressed if "silent" is set. |
1556 |
|
*/ |
1557 |
|
|
1558 |
|
static int |
1559 |
|
grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top) |
1560 |
|
{ |
1561 |
|
int rc = 1; |
1562 |
|
int sep; |
1563 |
|
int frtype; |
1564 |
|
int pathlen; |
1565 |
|
void *handle; |
1566 |
|
FILE *in = NULL; /* Ensure initialized */ |
1567 |
|
|
1568 |
|
#ifdef SUPPORT_LIBZ |
1569 |
|
gzFile ingz = NULL; |
1570 |
|
#endif |
1571 |
|
|
1572 |
|
#ifdef SUPPORT_LIBBZ2 |
1573 |
|
BZFILE *inbz2 = NULL; |
1574 |
|
#endif |
1575 |
|
|
1576 |
|
/* If the file name is "-" we scan stdin */ |
1577 |
|
|
1578 |
|
if (strcmp(pathname, "-") == 0) |
1579 |
|
{ |
1580 |
|
return pcregrep(stdin, FR_PLAIN, stdin_name, |
1581 |
|
(filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))? |
1582 |
|
stdin_name : NULL); |
1583 |
|
} |
1584 |
|
|
1585 |
|
/* If the file is a directory, skip if skipping or if we are recursing, scan |
1586 |
|
each file and directory within it, subject to any include or exclude patterns |
1587 |
|
that were set. The scanning code is localized so it can be made |
1588 |
|
system-specific. */ |
1589 |
|
|
1590 |
|
if ((sep = isdirectory(pathname)) != 0) |
1591 |
|
{ |
1592 |
|
if (dee_action == dee_SKIP) return 1; |
1593 |
|
if (dee_action == dee_RECURSE) |
1594 |
|
{ |
1595 |
|
char buffer[1024]; |
1596 |
|
char *nextfile; |
1597 |
|
directory_type *dir = opendirectory(pathname); |
1598 |
|
|
1599 |
|
if (dir == NULL) |
1600 |
|
{ |
1601 |
|
if (!silent) |
1602 |
|
fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname, |
1603 |
|
strerror(errno)); |
1604 |
|
return 2; |
1605 |
|
} |
1606 |
|
|
1607 |
|
while ((nextfile = readdirectory(dir)) != NULL) |
1608 |
|
{ |
1609 |
|
int frc, nflen; |
1610 |
|
sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile); |
1611 |
|
nflen = (int)(strlen(nextfile)); |
1612 |
|
|
1613 |
|
if (isdirectory(buffer)) |
1614 |
|
{ |
1615 |
|
if (exclude_dir_compiled != NULL && |
1616 |
|
pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0) |
1617 |
|
continue; |
1618 |
|
|
1619 |
|
if (include_dir_compiled != NULL && |
1620 |
|
pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0) |
1621 |
|
continue; |
1622 |
|
} |
1623 |
|
else |
1624 |
|
{ |
1625 |
|
if (exclude_compiled != NULL && |
1626 |
|
pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0) |
1627 |
|
continue; |
1628 |
|
|
1629 |
|
if (include_compiled != NULL && |
1630 |
|
pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0) |
1631 |
|
continue; |
1632 |
|
} |
1633 |
|
|
1634 |
|
frc = grep_or_recurse(buffer, dir_recurse, FALSE); |
1635 |
|
if (frc > 1) rc = frc; |
1636 |
|
else if (frc == 0 && rc == 1) rc = 0; |
1637 |
|
} |
1638 |
|
|
1639 |
|
closedirectory(dir); |
1640 |
|
return rc; |
1641 |
|
} |
1642 |
|
} |
1643 |
|
|
1644 |
|
/* If the file is not a directory and not a regular file, skip it if that's |
1645 |
|
been requested. */ |
1646 |
|
|
1647 |
|
else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1; |
1648 |
|
|
1649 |
|
/* Control reaches here if we have a regular file, or if we have a directory |
1650 |
|
and recursion or skipping was not requested, or if we have anything else and |
1651 |
|
skipping was not requested. The scan proceeds. If this is the first and only |
1652 |
|
argument at top level, we don't show the file name, unless we are only showing |
1653 |
|
the file name, or the filename was forced (-H). */ |
1654 |
|
|
1655 |
|
pathlen = (int)(strlen(pathname)); |
1656 |
|
|
1657 |
|
/* Open using zlib if it is supported and the file name ends with .gz. */ |
1658 |
|
|
1659 |
|
#ifdef SUPPORT_LIBZ |
1660 |
|
if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0) |
1661 |
|
{ |
1662 |
|
ingz = gzopen(pathname, "rb"); |
1663 |
|
if (ingz == NULL) |
1664 |
|
{ |
1665 |
|
if (!silent) |
1666 |
|
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, |
1667 |
|
strerror(errno)); |
1668 |
|
return 2; |
1669 |
|
} |
1670 |
|
handle = (void *)ingz; |
1671 |
|
frtype = FR_LIBZ; |
1672 |
|
} |
1673 |
|
else |
1674 |
|
#endif |
1675 |
|
|
1676 |
|
/* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */ |
1677 |
|
|
1678 |
|
#ifdef SUPPORT_LIBBZ2 |
1679 |
|
if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0) |
1680 |
|
{ |
1681 |
|
inbz2 = BZ2_bzopen(pathname, "rb"); |
1682 |
|
handle = (void *)inbz2; |
1683 |
|
frtype = FR_LIBBZ2; |
1684 |
|
} |
1685 |
|
else |
1686 |
|
#endif |
1687 |
|
|
1688 |
|
/* Otherwise use plain fopen(). The label is so that we can come back here if |
1689 |
|
an attempt to read a .bz2 file indicates that it really is a plain file. */ |
1690 |
|
|
1691 |
|
#ifdef SUPPORT_LIBBZ2 |
1692 |
|
PLAIN_FILE: |
1693 |
|
#endif |
1694 |
|
{ |
1695 |
|
in = fopen(pathname, "rb"); |
1696 |
|
handle = (void *)in; |
1697 |
|
frtype = FR_PLAIN; |
1698 |
|
} |
1699 |
|
|
1700 |
|
/* All the opening methods return errno when they fail. */ |
1701 |
|
|
1702 |
|
if (handle == NULL) |
1703 |
|
{ |
1704 |
|
if (!silent) |
1705 |
|
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname, |
1706 |
|
strerror(errno)); |
1707 |
|
return 2; |
1708 |
|
} |
1709 |
|
|
1710 |
|
/* Now grep the file */ |
1711 |
|
|
1712 |
|
rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT || |
1713 |
|
(filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL); |
1714 |
|
|
1715 |
|
/* Close in an appropriate manner. */ |
1716 |
|
|
1717 |
|
#ifdef SUPPORT_LIBZ |
1718 |
|
if (frtype == FR_LIBZ) |
1719 |
|
gzclose(ingz); |
1720 |
|
else |
1721 |
|
#endif |
1722 |
|
|
1723 |
|
/* If it is a .bz2 file and the result is 3, it means that the first attempt to |
1724 |
|
read failed. If the error indicates that the file isn't in fact bzipped, try |
1725 |
|
again as a normal file. */ |
1726 |
|
|
1727 |
|
#ifdef SUPPORT_LIBBZ2 |
1728 |
|
if (frtype == FR_LIBBZ2) |
1729 |
|
{ |
1730 |
|
if (rc == 3) |
1731 |
|
{ |
1732 |
|
int errnum; |
1733 |
|
const char *err = BZ2_bzerror(inbz2, &errnum); |
1734 |
|
if (errnum == BZ_DATA_ERROR_MAGIC) |
1735 |
|
{ |
1736 |
|
BZ2_bzclose(inbz2); |
1737 |
|
goto PLAIN_FILE; |
1738 |
|
} |
1739 |
|
else if (!silent) |
1740 |
|
fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n", |
1741 |
|
pathname, err); |
1742 |
|
rc = 2; /* The normal "something went wrong" code */ |
1743 |
|
} |
1744 |
|
BZ2_bzclose(inbz2); |
1745 |
|
} |
1746 |
|
else |
1747 |
|
#endif |
1748 |
|
|
1749 |
|
/* Normal file close */ |
1750 |
|
|
1751 |
|
fclose(in); |
1752 |
|
|
1753 |
|
/* Pass back the yield from pcregrep(). */ |
1754 |
|
|
1755 |
|
return rc; |
1756 |
|
} |
1757 |
|
|
1758 |
|
|
1759 |
|
|
1760 |
|
|
1761 |
|
/************************************************* |
1762 |
|
* Usage function * |
1763 |
|
*************************************************/ |
1764 |
|
|
1765 |
|
static int |
1766 |
|
usage(int rc) |
1767 |
|
{ |
1768 |
|
option_item *op; |
1769 |
|
fprintf(stderr, "Usage: pcregrep [-"); |
1770 |
|
for (op = optionlist; op->one_char != 0; op++) |
1771 |
|
{ |
1772 |
|
if (op->one_char > 0) fprintf(stderr, "%c", op->one_char); |
1773 |
|
} |
1774 |
|
fprintf(stderr, "] [long options] [pattern] [files]\n"); |
1775 |
|
fprintf(stderr, "Type `pcregrep --help' for more information and the long " |
1776 |
|
"options.\n"); |
1777 |
|
return rc; |
1778 |
|
} |
1779 |
|
|
1780 |
|
|
1781 |
|
|
1782 |
|
|
1783 |
|
/************************************************* |
1784 |
|
* Help function * |
1785 |
|
*************************************************/ |
1786 |
|
|
1787 |
|
static void |
1788 |
help(void) |
help(void) |
1789 |
{ |
{ |
1790 |
option_item *op; |
option_item *op; |
1791 |
|
|
1792 |
printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n"); |
printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n"); |
1793 |
printf("Search for PATTERN in each FILE or standard input.\n"); |
printf("Search for PATTERN in each FILE or standard input.\n"); |
1794 |
printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); |
printf("PATTERN must be present if neither -e nor -f is used.\n"); |
1795 |
|
printf("\"-\" can be used as a file name to mean STDIN.\n"); |
1796 |
|
|
1797 |
|
#ifdef SUPPORT_LIBZ |
1798 |
|
printf("Files whose names end in .gz are read using zlib.\n"); |
1799 |
|
#endif |
1800 |
|
|
1801 |
|
#ifdef SUPPORT_LIBBZ2 |
1802 |
|
printf("Files whose names end in .bz2 are read using bzlib2.\n"); |
1803 |
|
#endif |
1804 |
|
|
1805 |
|
#if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2 |
1806 |
|
printf("Other files and the standard input are read as plain files.\n\n"); |
1807 |
|
#else |
1808 |
|
printf("All files are read as plain files, without any interpretation.\n\n"); |
1809 |
|
#endif |
1810 |
|
|
1811 |
|
printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); |
1812 |
printf("Options:\n"); |
printf("Options:\n"); |
1813 |
|
|
1814 |
for (op = optionlist; op->one_char != 0; op++) |
for (op = optionlist; op->one_char != 0; op++) |
1815 |
{ |
{ |
1816 |
int n; |
int n; |
1817 |
char s[4]; |
char s[4]; |
1818 |
|
|
1819 |
|
/* Two options were accidentally implemented and documented with underscores |
1820 |
|
instead of hyphens in their names, something that was not noticed for quite a |
1821 |
|
few releases. When fixing this, I left the underscored versions in the list |
1822 |
|
in case people were using them. However, we don't want to display them in the |
1823 |
|
help data. There are no other options that contain underscores, and we do not |
1824 |
|
expect ever to implement such options. Therefore, just omit any option that |
1825 |
|
contains an underscore. */ |
1826 |
|
|
1827 |
|
if (strchr(op->long_name, '_') != NULL) continue; |
1828 |
|
|
1829 |
if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " "); |
if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " "); |
1830 |
printf(" %s --%s%n", s, op->long_name, &n); |
n = 31 - printf(" %s --%s", s, op->long_name); |
|
n = 30 - n; |
|
1831 |
if (n < 1) n = 1; |
if (n < 1) n = 1; |
1832 |
printf("%.*s%s\n", n, " ", op->help_text); |
printf("%.*s%s\n", n, " ", op->help_text); |
1833 |
} |
} |
1834 |
|
|
1835 |
printf("\n -f<filename> or --file=<filename>\n"); |
printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n"); |
1836 |
printf(" Read patterns from <filename> instead of using a command line option.\n"); |
printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE); |
1837 |
printf(" Trailing white space is removed; blanks lines are ignored.\n"); |
printf("When reading patterns from a file instead of using a command line option,\n"); |
1838 |
printf(" There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT); |
printf("trailing white space is removed and blank lines are ignored.\n"); |
1839 |
|
printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n", |
1840 |
|
MAX_PATTERN_COUNT, PATBUFSIZE); |
1841 |
|
|
1842 |
printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n"); |
printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n"); |
1843 |
printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n"); |
printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n"); |
1844 |
} |
} |
1845 |
|
|
1847 |
|
|
1848 |
|
|
1849 |
/************************************************* |
/************************************************* |
1850 |
* Handle an option * |
* Handle a single-letter, no data option * |
1851 |
*************************************************/ |
*************************************************/ |
1852 |
|
|
1853 |
static int |
static int |
1855 |
{ |
{ |
1856 |
switch(letter) |
switch(letter) |
1857 |
{ |
{ |
1858 |
case -1: help(); exit(0); |
case N_FOFFSETS: file_offsets = TRUE; break; |
1859 |
|
case N_HELP: help(); pcregrep_exit(0); |
1860 |
|
case N_LOFFSETS: line_offsets = number = TRUE; break; |
1861 |
|
case N_LBUFFER: line_buffered = TRUE; break; |
1862 |
case 'c': count_only = TRUE; break; |
case 'c': count_only = TRUE; break; |
1863 |
case 'h': filenames = FALSE; break; |
case 'F': process_options |= PO_FIXED_STRINGS; break; |
1864 |
|
case 'H': filenames = FN_FORCE; break; |
1865 |
|
case 'h': filenames = FN_NONE; break; |
1866 |
case 'i': options |= PCRE_CASELESS; break; |
case 'i': options |= PCRE_CASELESS; break; |
1867 |
case 'l': filenames_only = TRUE; |
case 'j': study_options |= PCRE_STUDY_JIT_COMPILE; break; |
1868 |
|
case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break; |
1869 |
|
case 'L': filenames = FN_NOMATCH_ONLY; break; |
1870 |
|
case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break; |
1871 |
case 'n': number = TRUE; break; |
case 'n': number = TRUE; break; |
1872 |
case 'r': recurse = TRUE; break; |
case 'o': only_matching = 0; break; |
1873 |
|
case 'q': quiet = TRUE; break; |
1874 |
|
case 'r': dee_action = dee_RECURSE; break; |
1875 |
case 's': silent = TRUE; break; |
case 's': silent = TRUE; break; |
1876 |
|
case 'u': options |= PCRE_UTF8; utf8 = TRUE; break; |
1877 |
case 'v': invert = TRUE; break; |
case 'v': invert = TRUE; break; |
1878 |
case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break; |
case 'w': process_options |= PO_WORD_MATCH; break; |
1879 |
|
case 'x': process_options |= PO_LINE_MATCH; break; |
1880 |
|
|
1881 |
case 'V': |
case 'V': |
1882 |
fprintf(stderr, "pcregrep version %s using ", VERSION); |
fprintf(stderr, "pcregrep version %s\n", pcre_version()); |
1883 |
fprintf(stderr, "PCRE version %s\n", pcre_version()); |
pcregrep_exit(0); |
|
exit(0); |
|
1884 |
break; |
break; |
1885 |
|
|
1886 |
default: |
default: |
1887 |
fprintf(stderr, "pcregrep: Unknown option -%c\n", letter); |
fprintf(stderr, "pcregrep: Unknown option -%c\n", letter); |
1888 |
exit(usage(2)); |
pcregrep_exit(usage(2)); |
1889 |
|
} |
1890 |
|
|
1891 |
|
return options; |
1892 |
|
} |
1893 |
|
|
1894 |
|
|
1895 |
|
|
1896 |
|
|
1897 |
|
/************************************************* |
1898 |
|
* Construct printed ordinal * |
1899 |
|
*************************************************/ |
1900 |
|
|
1901 |
|
/* This turns a number into "1st", "3rd", etc. */ |
1902 |
|
|
1903 |
|
static char * |
1904 |
|
ordin(int n) |
1905 |
|
{ |
1906 |
|
static char buffer[8]; |
1907 |
|
char *p = buffer; |
1908 |
|
sprintf(p, "%d", n); |
1909 |
|
while (*p != 0) p++; |
1910 |
|
switch (n%10) |
1911 |
|
{ |
1912 |
|
case 1: strcpy(p, "st"); break; |
1913 |
|
case 2: strcpy(p, "nd"); break; |
1914 |
|
case 3: strcpy(p, "rd"); break; |
1915 |
|
default: strcpy(p, "th"); break; |
1916 |
|
} |
1917 |
|
return buffer; |
1918 |
|
} |
1919 |
|
|
1920 |
|
|
1921 |
|
|
1922 |
|
/************************************************* |
1923 |
|
* Compile a single pattern * |
1924 |
|
*************************************************/ |
1925 |
|
|
1926 |
|
/* When the -F option has been used, this is called for each substring. |
1927 |
|
Otherwise it's called for each supplied pattern. |
1928 |
|
|
1929 |
|
Arguments: |
1930 |
|
pattern the pattern string |
1931 |
|
options the PCRE options |
1932 |
|
filename the file name, or NULL for a command-line pattern |
1933 |
|
count 0 if this is the only command line pattern, or |
1934 |
|
number of the command line pattern, or |
1935 |
|
linenumber for a pattern from a file |
1936 |
|
|
1937 |
|
Returns: TRUE on success, FALSE after an error |
1938 |
|
*/ |
1939 |
|
|
1940 |
|
static BOOL |
1941 |
|
compile_single_pattern(char *pattern, int options, char *filename, int count) |
1942 |
|
{ |
1943 |
|
char buffer[PATBUFSIZE]; |
1944 |
|
const char *error; |
1945 |
|
int errptr; |
1946 |
|
|
1947 |
|
if (pattern_count >= MAX_PATTERN_COUNT) |
1948 |
|
{ |
1949 |
|
fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n", |
1950 |
|
(filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT); |
1951 |
|
return FALSE; |
1952 |
|
} |
1953 |
|
|
1954 |
|
sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern, |
1955 |
|
suffix[process_options]); |
1956 |
|
pattern_list[pattern_count] = |
1957 |
|
pcre_compile(buffer, options, &error, &errptr, pcretables); |
1958 |
|
if (pattern_list[pattern_count] != NULL) |
1959 |
|
{ |
1960 |
|
pattern_count++; |
1961 |
|
return TRUE; |
1962 |
|
} |
1963 |
|
|
1964 |
|
/* Handle compile errors */ |
1965 |
|
|
1966 |
|
errptr -= (int)strlen(prefix[process_options]); |
1967 |
|
if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern); |
1968 |
|
|
1969 |
|
if (filename == NULL) |
1970 |
|
{ |
1971 |
|
if (count == 0) |
1972 |
|
fprintf(stderr, "pcregrep: Error in command-line regex " |
1973 |
|
"at offset %d: %s\n", errptr, error); |
1974 |
|
else |
1975 |
|
fprintf(stderr, "pcregrep: Error in %s command-line regex " |
1976 |
|
"at offset %d: %s\n", ordin(count), errptr, error); |
1977 |
|
} |
1978 |
|
else |
1979 |
|
{ |
1980 |
|
fprintf(stderr, "pcregrep: Error in regex in line %d of %s " |
1981 |
|
"at offset %d: %s\n", count, filename, errptr, error); |
1982 |
} |
} |
1983 |
|
|
1984 |
return options; |
return FALSE; |
1985 |
} |
} |
1986 |
|
|
1987 |
|
|
1988 |
|
|
1989 |
|
/************************************************* |
1990 |
|
* Compile one supplied pattern * |
1991 |
|
*************************************************/ |
1992 |
|
|
1993 |
|
/* When the -F option has been used, each string may be a list of strings, |
1994 |
|
separated by line breaks. They will be matched literally. |
1995 |
|
|
1996 |
|
Arguments: |
1997 |
|
pattern the pattern string |
1998 |
|
options the PCRE options |
1999 |
|
filename the file name, or NULL for a command-line pattern |
2000 |
|
count 0 if this is the only command line pattern, or |
2001 |
|
number of the command line pattern, or |
2002 |
|
linenumber for a pattern from a file |
2003 |
|
|
2004 |
|
Returns: TRUE on success, FALSE after an error |
2005 |
|
*/ |
2006 |
|
|
2007 |
|
static BOOL |
2008 |
|
compile_pattern(char *pattern, int options, char *filename, int count) |
2009 |
|
{ |
2010 |
|
if ((process_options & PO_FIXED_STRINGS) != 0) |
2011 |
|
{ |
2012 |
|
char *eop = pattern + strlen(pattern); |
2013 |
|
char buffer[PATBUFSIZE]; |
2014 |
|
for(;;) |
2015 |
|
{ |
2016 |
|
int ellength; |
2017 |
|
char *p = end_of_line(pattern, eop, &ellength); |
2018 |
|
if (ellength == 0) |
2019 |
|
return compile_single_pattern(pattern, options, filename, count); |
2020 |
|
sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern); |
2021 |
|
pattern = p; |
2022 |
|
if (!compile_single_pattern(buffer, options, filename, count)) |
2023 |
|
return FALSE; |
2024 |
|
} |
2025 |
|
} |
2026 |
|
else return compile_single_pattern(pattern, options, filename, count); |
2027 |
|
} |
2028 |
|
|
2029 |
|
|
2030 |
|
|
2031 |
/************************************************* |
/************************************************* |
2032 |
* Main program * |
* Main program * |
2033 |
*************************************************/ |
*************************************************/ |
2034 |
|
|
2035 |
|
/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */ |
2036 |
|
|
2037 |
int |
int |
2038 |
main(int argc, char **argv) |
main(int argc, char **argv) |
2039 |
{ |
{ |
2040 |
int i, j; |
int i, j; |
2041 |
int rc = 1; |
int rc = 1; |
2042 |
int options = 0; |
int pcre_options = 0; |
2043 |
|
int cmd_pattern_count = 0; |
2044 |
|
int hint_count = 0; |
2045 |
int errptr; |
int errptr; |
|
const char *error; |
|
2046 |
BOOL only_one_at_top; |
BOOL only_one_at_top; |
2047 |
|
char *patterns[MAX_PATTERN_COUNT]; |
2048 |
|
const char *locale_from = "--locale"; |
2049 |
|
const char *error; |
2050 |
|
|
2051 |
|
/* Set the default line ending value from the default in the PCRE library; |
2052 |
|
"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf". |
2053 |
|
Note that the return values from pcre_config(), though derived from the ASCII |
2054 |
|
codes, are the same in EBCDIC environments, so we must use the actual values |
2055 |
|
rather than escapes such as as '\r'. */ |
2056 |
|
|
2057 |
|
(void)pcre_config(PCRE_CONFIG_NEWLINE, &i); |
2058 |
|
switch(i) |
2059 |
|
{ |
2060 |
|
default: newline = (char *)"lf"; break; |
2061 |
|
case 13: newline = (char *)"cr"; break; |
2062 |
|
case (13 << 8) | 10: newline = (char *)"crlf"; break; |
2063 |
|
case -1: newline = (char *)"any"; break; |
2064 |
|
case -2: newline = (char *)"anycrlf"; break; |
2065 |
|
} |
2066 |
|
|
2067 |
/* Process the options */ |
/* Process the options */ |
2068 |
|
|
2069 |
for (i = 1; i < argc; i++) |
for (i = 1; i < argc; i++) |
2070 |
{ |
{ |
2071 |
|
option_item *op = NULL; |
2072 |
|
char *option_data = (char *)""; /* default to keep compiler happy */ |
2073 |
|
BOOL longop; |
2074 |
|
BOOL longopwasequals = FALSE; |
2075 |
|
|
2076 |
if (argv[i][0] != '-') break; |
if (argv[i][0] != '-') break; |
2077 |
|
|
2078 |
/* Long name options */ |
/* If we hit an argument that is just "-", it may be a reference to STDIN, |
2079 |
|
but only if we have previously had -e or -f to define the patterns. */ |
2080 |
|
|
2081 |
|
if (argv[i][1] == 0) |
2082 |
|
{ |
2083 |
|
if (pattern_filename != NULL || pattern_count > 0) break; |
2084 |
|
else pcregrep_exit(usage(2)); |
2085 |
|
} |
2086 |
|
|
2087 |
|
/* Handle a long name option, or -- to terminate the options */ |
2088 |
|
|
2089 |
if (argv[i][1] == '-') |
if (argv[i][1] == '-') |
2090 |
{ |
{ |
2091 |
option_item *op; |
char *arg = argv[i] + 2; |
2092 |
|
char *argequals = strchr(arg, '='); |
2093 |
|
|
2094 |
if (strncmp(argv[i]+2, "file=", 5) == 0) |
if (*arg == 0) /* -- terminates options */ |
2095 |
{ |
{ |
2096 |
pattern_filename = argv[i] + 7; |
i++; |
2097 |
continue; |
break; /* out of the options-handling loop */ |
2098 |
} |
} |
2099 |
|
|
2100 |
|
longop = TRUE; |
2101 |
|
|
2102 |
|
/* Some long options have data that follows after =, for example file=name. |
2103 |
|
Some options have variations in the long name spelling: specifically, we |
2104 |
|
allow "regexp" because GNU grep allows it, though I personally go along |
2105 |
|
with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p". |
2106 |
|
These options are entered in the table as "regex(p)". Options can be in |
2107 |
|
both these categories. */ |
2108 |
|
|
2109 |
for (op = optionlist; op->one_char != 0; op++) |
for (op = optionlist; op->one_char != 0; op++) |
2110 |
{ |
{ |
2111 |
if (strcmp(argv[i]+2, op->long_name) == 0) |
char *opbra = strchr(op->long_name, '('); |
2112 |
|
char *equals = strchr(op->long_name, '='); |
2113 |
|
|
2114 |
|
/* Handle options with only one spelling of the name */ |
2115 |
|
|
2116 |
|
if (opbra == NULL) /* Does not contain '(' */ |
2117 |
|
{ |
2118 |
|
if (equals == NULL) /* Not thing=data case */ |
2119 |
|
{ |
2120 |
|
if (strcmp(arg, op->long_name) == 0) break; |
2121 |
|
} |
2122 |
|
else /* Special case xxx=data */ |
2123 |
|
{ |
2124 |
|
int oplen = (int)(equals - op->long_name); |
2125 |
|
int arglen = (argequals == NULL)? |
2126 |
|
(int)strlen(arg) : (int)(argequals - arg); |
2127 |
|
if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0) |
2128 |
|
{ |
2129 |
|
option_data = arg + arglen; |
2130 |
|
if (*option_data == '=') |
2131 |
|
{ |
2132 |
|
option_data++; |
2133 |
|
longopwasequals = TRUE; |
2134 |
|
} |
2135 |
|
break; |
2136 |
|
} |
2137 |
|
} |
2138 |
|
} |
2139 |
|
|
2140 |
|
/* Handle options with an alternate spelling of the name */ |
2141 |
|
|
2142 |
|
else |
2143 |
{ |
{ |
2144 |
options = handle_option(op->one_char, options); |
char buff1[24]; |
2145 |
break; |
char buff2[24]; |
2146 |
|
|
2147 |
|
int baselen = (int)(opbra - op->long_name); |
2148 |
|
int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1); |
2149 |
|
int arglen = (argequals == NULL || equals == NULL)? |
2150 |
|
(int)strlen(arg) : (int)(argequals - arg); |
2151 |
|
|
2152 |
|
sprintf(buff1, "%.*s", baselen, op->long_name); |
2153 |
|
sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1); |
2154 |
|
|
2155 |
|
if (strncmp(arg, buff1, arglen) == 0 || |
2156 |
|
strncmp(arg, buff2, arglen) == 0) |
2157 |
|
{ |
2158 |
|
if (equals != NULL && argequals != NULL) |
2159 |
|
{ |
2160 |
|
option_data = argequals; |
2161 |
|
if (*option_data == '=') |
2162 |
|
{ |
2163 |
|
option_data++; |
2164 |
|
longopwasequals = TRUE; |
2165 |
|
} |
2166 |
|
} |
2167 |
|
break; |
2168 |
|
} |
2169 |
} |
} |
2170 |
} |
} |
2171 |
|
|
2172 |
if (op->one_char == 0) |
if (op->one_char == 0) |
2173 |
{ |
{ |
2174 |
fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]); |
fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]); |
2175 |
exit(usage(2)); |
pcregrep_exit(usage(2)); |
2176 |
} |
} |
2177 |
} |
} |
2178 |
|
|
2179 |
/* One-char options */ |
/* Jeffrey Friedl's debugging harness uses these additional options which |
2180 |
|
are not in the right form for putting in the option table because they use |
2181 |
|
only one hyphen, yet are more than one character long. By putting them |
2182 |
|
separately here, they will not get displayed as part of the help() output, |
2183 |
|
but I don't think Jeffrey will care about that. */ |
2184 |
|
|
2185 |
|
#ifdef JFRIEDL_DEBUG |
2186 |
|
else if (strcmp(argv[i], "-pre") == 0) { |
2187 |
|
jfriedl_prefix = argv[++i]; |
2188 |
|
continue; |
2189 |
|
} else if (strcmp(argv[i], "-post") == 0) { |
2190 |
|
jfriedl_postfix = argv[++i]; |
2191 |
|
continue; |
2192 |
|
} else if (strcmp(argv[i], "-XT") == 0) { |
2193 |
|
sscanf(argv[++i], "%d", &jfriedl_XT); |
2194 |
|
continue; |
2195 |
|
} else if (strcmp(argv[i], "-XR") == 0) { |
2196 |
|
sscanf(argv[++i], "%d", &jfriedl_XR); |
2197 |
|
continue; |
2198 |
|
} |
2199 |
|
#endif |
2200 |
|
|
2201 |
|
|
2202 |
|
/* One-char options; many that have no data may be in a single argument; we |
2203 |
|
continue till we hit the last one or one that needs data. */ |
2204 |
|
|
2205 |
else |
else |
2206 |
{ |
{ |
2207 |
char *s = argv[i] + 1; |
char *s = argv[i] + 1; |
2208 |
|
longop = FALSE; |
2209 |
while (*s != 0) |
while (*s != 0) |
2210 |
{ |
{ |
2211 |
if (*s == 'f') |
for (op = optionlist; op->one_char != 0; op++) |
2212 |
{ |
{ |
2213 |
pattern_filename = s + 1; |
if (*s == op->one_char) break; |
2214 |
if (pattern_filename[0] == 0) |
} |
2215 |
{ |
if (op->one_char == 0) |
2216 |
if (i >= argc - 1) |
{ |
2217 |
{ |
fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n", |
2218 |
fprintf(stderr, "pcregrep: File name missing after -f\n"); |
*s, argv[i]); |
2219 |
exit(usage(2)); |
pcregrep_exit(usage(2)); |
2220 |
} |
} |
2221 |
pattern_filename = argv[++i]; |
|
2222 |
} |
/* Check for a single-character option that has data: OP_OP_NUMBER |
2223 |
break; |
is used for one that either has a numerical number or defaults, i.e. the |
2224 |
|
data is optional. If a digit follows, there is data; if not, carry on |
2225 |
|
with other single-character options in the same string. */ |
2226 |
|
|
2227 |
|
option_data = s+1; |
2228 |
|
if (op->type == OP_OP_NUMBER) |
2229 |
|
{ |
2230 |
|
if (isdigit((unsigned char)s[1])) break; |
2231 |
|
} |
2232 |
|
else /* Check for end or a dataless option */ |
2233 |
|
{ |
2234 |
|
if (op->type != OP_NODATA || s[1] == 0) break; |
2235 |
} |
} |
2236 |
else options = handle_option(*s++, options); |
|
2237 |
|
/* Handle a single-character option with no data, then loop for the |
2238 |
|
next character in the string. */ |
2239 |
|
|
2240 |
|
pcre_options = handle_option(*s++, pcre_options); |
2241 |
} |
} |
2242 |
} |
} |
|
} |
|
2243 |
|
|
2244 |
pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *)); |
/* At this point we should have op pointing to a matched option. If the type |
2245 |
hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *)); |
is NO_DATA, it means that there is no data, and the option might set |
2246 |
|
something in the PCRE options. */ |
2247 |
|
|
2248 |
if (pattern_list == NULL || hints_list == NULL) |
if (op->type == OP_NODATA) |
2249 |
{ |
{ |
2250 |
fprintf(stderr, "pcregrep: malloc failed\n"); |
pcre_options = handle_option(op->one_char, pcre_options); |
2251 |
return 2; |
continue; |
2252 |
} |
} |
2253 |
|
|
2254 |
/* Compile the regular expression(s). */ |
/* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that |
2255 |
|
either has a value or defaults to something. It cannot have data in a |
2256 |
|
separate item. At the moment, the only such options are "colo(u)r", |
2257 |
|
"only-matching", and Jeffrey Friedl's special -S debugging option. */ |
2258 |
|
|
2259 |
if (pattern_filename != NULL) |
if (*option_data == 0 && |
2260 |
{ |
(op->type == OP_OP_STRING || op->type == OP_OP_NUMBER)) |
|
FILE *f = fopen(pattern_filename, "r"); |
|
|
char buffer[BUFSIZ]; |
|
|
if (f == NULL) |
|
2261 |
{ |
{ |
2262 |
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename, |
switch (op->one_char) |
2263 |
strerror(errno)); |
{ |
2264 |
return 2; |
case N_COLOUR: |
2265 |
|
colour_option = (char *)"auto"; |
2266 |
|
break; |
2267 |
|
|
2268 |
|
case 'o': |
2269 |
|
only_matching = 0; |
2270 |
|
break; |
2271 |
|
|
2272 |
|
#ifdef JFRIEDL_DEBUG |
2273 |
|
case 'S': |
2274 |
|
S_arg = 0; |
2275 |
|
break; |
2276 |
|
#endif |
2277 |
|
} |
2278 |
|
continue; |
2279 |
} |
} |
2280 |
while (fgets(buffer, sizeof(buffer), f) != NULL) |
|
2281 |
|
/* Otherwise, find the data string for the option. */ |
2282 |
|
|
2283 |
|
if (*option_data == 0) |
2284 |
{ |
{ |
2285 |
char *s = buffer + (int)strlen(buffer); |
if (i >= argc - 1 || longopwasequals) |
2286 |
if (pattern_count >= MAX_PATTERN_COUNT) |
{ |
2287 |
|
fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]); |
2288 |
|
pcregrep_exit(usage(2)); |
2289 |
|
} |
2290 |
|
option_data = argv[++i]; |
2291 |
|
} |
2292 |
|
|
2293 |
|
/* If the option type is OP_PATLIST, it's the -e option, which can be called |
2294 |
|
multiple times to create a list of patterns. */ |
2295 |
|
|
2296 |
|
if (op->type == OP_PATLIST) |
2297 |
|
{ |
2298 |
|
if (cmd_pattern_count >= MAX_PATTERN_COUNT) |
2299 |
{ |
{ |
2300 |
fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n", |
fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n", |
2301 |
MAX_PATTERN_COUNT); |
MAX_PATTERN_COUNT); |
2302 |
return 2; |
return 2; |
2303 |
} |
} |
2304 |
while (s > buffer && isspace((unsigned char)(s[-1]))) s--; |
patterns[cmd_pattern_count++] = option_data; |
2305 |
if (s == buffer) continue; |
} |
2306 |
*s = 0; |
|
2307 |
pattern_list[pattern_count] = pcre_compile(buffer, options, &error, |
/* Otherwise, deal with single string or numeric data values. */ |
2308 |
&errptr, NULL); |
|
2309 |
if (pattern_list[pattern_count++] == NULL) |
else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER && |
2310 |
|
op->type != OP_OP_NUMBER) |
2311 |
|
{ |
2312 |
|
*((char **)op->dataptr) = option_data; |
2313 |
|
} |
2314 |
|
|
2315 |
|
/* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used |
2316 |
|
only for unpicking arguments, so just keep it simple. */ |
2317 |
|
|
2318 |
|
else |
2319 |
|
{ |
2320 |
|
unsigned long int n = 0; |
2321 |
|
char *endptr = option_data; |
2322 |
|
while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++; |
2323 |
|
while (isdigit((unsigned char)(*endptr))) |
2324 |
|
n = n * 10 + (int)(*endptr++ - '0'); |
2325 |
|
if (toupper(*endptr) == 'K') |
2326 |
{ |
{ |
2327 |
fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n", |
n *= 1024; |
2328 |
pattern_count, errptr, error); |
endptr++; |
|
return 2; |
|
2329 |
} |
} |
2330 |
|
else if (toupper(*endptr) == 'M') |
2331 |
|
{ |
2332 |
|
n *= 1024*1024; |
2333 |
|
endptr++; |
2334 |
|
} |
2335 |
|
if (*endptr != 0) |
2336 |
|
{ |
2337 |
|
if (longop) |
2338 |
|
{ |
2339 |
|
char *equals = strchr(op->long_name, '='); |
2340 |
|
int nlen = (equals == NULL)? (int)strlen(op->long_name) : |
2341 |
|
(int)(equals - op->long_name); |
2342 |
|
fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n", |
2343 |
|
option_data, nlen, op->long_name); |
2344 |
|
} |
2345 |
|
else |
2346 |
|
fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n", |
2347 |
|
option_data, op->one_char); |
2348 |
|
pcregrep_exit(usage(2)); |
2349 |
|
} |
2350 |
|
if (op->type == OP_LONGNUMBER) |
2351 |
|
*((unsigned long int *)op->dataptr) = n; |
2352 |
|
else |
2353 |
|
*((int *)op->dataptr) = n; |
2354 |
|
} |
2355 |
|
} |
2356 |
|
|
2357 |
|
/* Options have been decoded. If -C was used, its value is used as a default |
2358 |
|
for -A and -B. */ |
2359 |
|
|
2360 |
|
if (both_context > 0) |
2361 |
|
{ |
2362 |
|
if (after_context == 0) after_context = both_context; |
2363 |
|
if (before_context == 0) before_context = both_context; |
2364 |
|
} |
2365 |
|
|
2366 |
|
/* Only one of --only-matching, --file-offsets, or --line-offsets is permitted. |
2367 |
|
However, the latter two set only_matching. */ |
2368 |
|
|
2369 |
|
if ((only_matching >= 0 && (file_offsets || line_offsets)) || |
2370 |
|
(file_offsets && line_offsets)) |
2371 |
|
{ |
2372 |
|
fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets " |
2373 |
|
"and/or --line-offsets\n"); |
2374 |
|
pcregrep_exit(usage(2)); |
2375 |
|
} |
2376 |
|
|
2377 |
|
if (file_offsets || line_offsets) only_matching = 0; |
2378 |
|
|
2379 |
|
/* If a locale has not been provided as an option, see if the LC_CTYPE or |
2380 |
|
LC_ALL environment variable is set, and if so, use it. */ |
2381 |
|
|
2382 |
|
if (locale == NULL) |
2383 |
|
{ |
2384 |
|
locale = getenv("LC_ALL"); |
2385 |
|
locale_from = "LCC_ALL"; |
2386 |
|
} |
2387 |
|
|
2388 |
|
if (locale == NULL) |
2389 |
|
{ |
2390 |
|
locale = getenv("LC_CTYPE"); |
2391 |
|
locale_from = "LC_CTYPE"; |
2392 |
|
} |
2393 |
|
|
2394 |
|
/* If a locale has been provided, set it, and generate the tables the PCRE |
2395 |
|
needs. Otherwise, pcretables==NULL, which causes the use of default tables. */ |
2396 |
|
|
2397 |
|
if (locale != NULL) |
2398 |
|
{ |
2399 |
|
if (setlocale(LC_CTYPE, locale) == NULL) |
2400 |
|
{ |
2401 |
|
fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n", |
2402 |
|
locale, locale_from); |
2403 |
|
return 2; |
2404 |
|
} |
2405 |
|
pcretables = pcre_maketables(); |
2406 |
|
} |
2407 |
|
|
2408 |
|
/* Sort out colouring */ |
2409 |
|
|
2410 |
|
if (colour_option != NULL && strcmp(colour_option, "never") != 0) |
2411 |
|
{ |
2412 |
|
if (strcmp(colour_option, "always") == 0) do_colour = TRUE; |
2413 |
|
else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty(); |
2414 |
|
else |
2415 |
|
{ |
2416 |
|
fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n", |
2417 |
|
colour_option); |
2418 |
|
return 2; |
2419 |
|
} |
2420 |
|
if (do_colour) |
2421 |
|
{ |
2422 |
|
char *cs = getenv("PCREGREP_COLOUR"); |
2423 |
|
if (cs == NULL) cs = getenv("PCREGREP_COLOR"); |
2424 |
|
if (cs != NULL) colour_string = cs; |
2425 |
} |
} |
|
fclose(f); |
|
2426 |
} |
} |
2427 |
|
|
2428 |
/* If no file name, a single regex must be given inline */ |
/* Interpret the newline type; the default settings are Unix-like. */ |
2429 |
|
|
2430 |
|
if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0) |
2431 |
|
{ |
2432 |
|
pcre_options |= PCRE_NEWLINE_CR; |
2433 |
|
endlinetype = EL_CR; |
2434 |
|
} |
2435 |
|
else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0) |
2436 |
|
{ |
2437 |
|
pcre_options |= PCRE_NEWLINE_LF; |
2438 |
|
endlinetype = EL_LF; |
2439 |
|
} |
2440 |
|
else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0) |
2441 |
|
{ |
2442 |
|
pcre_options |= PCRE_NEWLINE_CRLF; |
2443 |
|
endlinetype = EL_CRLF; |
2444 |
|
} |
2445 |
|
else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0) |
2446 |
|
{ |
2447 |
|
pcre_options |= PCRE_NEWLINE_ANY; |
2448 |
|
endlinetype = EL_ANY; |
2449 |
|
} |
2450 |
|
else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0) |
2451 |
|
{ |
2452 |
|
pcre_options |= PCRE_NEWLINE_ANYCRLF; |
2453 |
|
endlinetype = EL_ANYCRLF; |
2454 |
|
} |
2455 |
else |
else |
2456 |
{ |
{ |
2457 |
if (i >= argc) return usage(0); |
fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline); |
2458 |
pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL); |
return 2; |
2459 |
if (pattern_list[0] == NULL) |
} |
2460 |
|
|
2461 |
|
/* Interpret the text values for -d and -D */ |
2462 |
|
|
2463 |
|
if (dee_option != NULL) |
2464 |
|
{ |
2465 |
|
if (strcmp(dee_option, "read") == 0) dee_action = dee_READ; |
2466 |
|
else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE; |
2467 |
|
else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP; |
2468 |
|
else |
2469 |
|
{ |
2470 |
|
fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option); |
2471 |
|
return 2; |
2472 |
|
} |
2473 |
|
} |
2474 |
|
|
2475 |
|
if (DEE_option != NULL) |
2476 |
|
{ |
2477 |
|
if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ; |
2478 |
|
else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP; |
2479 |
|
else |
2480 |
{ |
{ |
2481 |
fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr, |
fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option); |
|
error); |
|
2482 |
return 2; |
return 2; |
2483 |
} |
} |
|
pattern_count++; |
|
2484 |
} |
} |
2485 |
|
|
2486 |
/* Study the regular expressions, as we will be running them may times */ |
/* Check the values for Jeffrey Friedl's debugging options. */ |
2487 |
|
|
2488 |
|
#ifdef JFRIEDL_DEBUG |
2489 |
|
if (S_arg > 9) |
2490 |
|
{ |
2491 |
|
fprintf(stderr, "pcregrep: bad value for -S option\n"); |
2492 |
|
return 2; |
2493 |
|
} |
2494 |
|
if (jfriedl_XT != 0 || jfriedl_XR != 0) |
2495 |
|
{ |
2496 |
|
if (jfriedl_XT == 0) jfriedl_XT = 1; |
2497 |
|
if (jfriedl_XR == 0) jfriedl_XR = 1; |
2498 |
|
} |
2499 |
|
#endif |
2500 |
|
|
2501 |
|
/* Get memory for the main buffer, and to store the pattern and hints lists. */ |
2502 |
|
|
2503 |
|
bufsize = 3*bufthird; |
2504 |
|
main_buffer = (char *)malloc(bufsize); |
2505 |
|
pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *)); |
2506 |
|
hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *)); |
2507 |
|
|
2508 |
|
if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL) |
2509 |
|
{ |
2510 |
|
fprintf(stderr, "pcregrep: malloc failed\n"); |
2511 |
|
goto EXIT2; |
2512 |
|
} |
2513 |
|
|
2514 |
|
/* If no patterns were provided by -e, and there is no file provided by -f, |
2515 |
|
the first argument is the one and only pattern, and it must exist. */ |
2516 |
|
|
2517 |
|
if (cmd_pattern_count == 0 && pattern_filename == NULL) |
2518 |
|
{ |
2519 |
|
if (i >= argc) return usage(2); |
2520 |
|
patterns[cmd_pattern_count++] = argv[i++]; |
2521 |
|
} |
2522 |
|
|
2523 |
|
/* Compile the patterns that were provided on the command line, either by |
2524 |
|
multiple uses of -e or as a single unkeyed pattern. */ |
2525 |
|
|
2526 |
|
for (j = 0; j < cmd_pattern_count; j++) |
2527 |
|
{ |
2528 |
|
if (!compile_pattern(patterns[j], pcre_options, NULL, |
2529 |
|
(j == 0 && cmd_pattern_count == 1)? 0 : j + 1)) |
2530 |
|
goto EXIT2; |
2531 |
|
} |
2532 |
|
|
2533 |
|
/* Compile the regular expressions that are provided in a file. */ |
2534 |
|
|
2535 |
|
if (pattern_filename != NULL) |
2536 |
|
{ |
2537 |
|
int linenumber = 0; |
2538 |
|
FILE *f; |
2539 |
|
char *filename; |
2540 |
|
char buffer[PATBUFSIZE]; |
2541 |
|
|
2542 |
|
if (strcmp(pattern_filename, "-") == 0) |
2543 |
|
{ |
2544 |
|
f = stdin; |
2545 |
|
filename = stdin_name; |
2546 |
|
} |
2547 |
|
else |
2548 |
|
{ |
2549 |
|
f = fopen(pattern_filename, "r"); |
2550 |
|
if (f == NULL) |
2551 |
|
{ |
2552 |
|
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename, |
2553 |
|
strerror(errno)); |
2554 |
|
goto EXIT2; |
2555 |
|
} |
2556 |
|
filename = pattern_filename; |
2557 |
|
} |
2558 |
|
|
2559 |
|
while (fgets(buffer, PATBUFSIZE, f) != NULL) |
2560 |
|
{ |
2561 |
|
char *s = buffer + (int)strlen(buffer); |
2562 |
|
while (s > buffer && isspace((unsigned char)(s[-1]))) s--; |
2563 |
|
*s = 0; |
2564 |
|
linenumber++; |
2565 |
|
if (buffer[0] == 0) continue; /* Skip blank lines */ |
2566 |
|
if (!compile_pattern(buffer, pcre_options, filename, linenumber)) |
2567 |
|
goto EXIT2; |
2568 |
|
} |
2569 |
|
|
2570 |
|
if (f != stdin) fclose(f); |
2571 |
|
} |
2572 |
|
|
2573 |
|
/* Study the regular expressions, as we will be running them many times */ |
2574 |
|
|
2575 |
for (j = 0; j < pattern_count; j++) |
for (j = 0; j < pattern_count; j++) |
2576 |
{ |
{ |
2577 |
hints_list[j] = pcre_study(pattern_list[j], 0, &error); |
hints_list[j] = pcre_study(pattern_list[j], study_options, &error); |
2578 |
if (error != NULL) |
if (error != NULL) |
2579 |
{ |
{ |
2580 |
char s[16]; |
char s[16]; |
2581 |
if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j); |
if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j); |
2582 |
fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); |
fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); |
2583 |
return 2; |
goto EXIT2; |
2584 |
|
} |
2585 |
|
hint_count++; |
2586 |
|
} |
2587 |
|
|
2588 |
|
/* If --match-limit or --recursion-limit was set, put the value(s) into the |
2589 |
|
pcre_extra block for each pattern. */ |
2590 |
|
|
2591 |
|
if (match_limit > 0 || match_limit_recursion > 0) |
2592 |
|
{ |
2593 |
|
for (j = 0; j < pattern_count; j++) |
2594 |
|
{ |
2595 |
|
if (hints_list[j] == NULL) |
2596 |
|
{ |
2597 |
|
hints_list[j] = malloc(sizeof(pcre_extra)); |
2598 |
|
if (hints_list[j] == NULL) |
2599 |
|
{ |
2600 |
|
fprintf(stderr, "pcregrep: malloc failed\n"); |
2601 |
|
pcregrep_exit(2); |
2602 |
|
} |
2603 |
|
} |
2604 |
|
if (match_limit > 0) |
2605 |
|
{ |
2606 |
|
hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT; |
2607 |
|
hints_list[j]->match_limit = match_limit; |
2608 |
|
} |
2609 |
|
if (match_limit_recursion > 0) |
2610 |
|
{ |
2611 |
|
hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; |
2612 |
|
hints_list[j]->match_limit_recursion = match_limit_recursion; |
2613 |
|
} |
2614 |
|
} |
2615 |
|
} |
2616 |
|
|
2617 |
|
/* If there are include or exclude patterns, compile them. */ |
2618 |
|
|
2619 |
|
if (exclude_pattern != NULL) |
2620 |
|
{ |
2621 |
|
exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, |
2622 |
|
pcretables); |
2623 |
|
if (exclude_compiled == NULL) |
2624 |
|
{ |
2625 |
|
fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n", |
2626 |
|
errptr, error); |
2627 |
|
goto EXIT2; |
2628 |
|
} |
2629 |
|
} |
2630 |
|
|
2631 |
|
if (include_pattern != NULL) |
2632 |
|
{ |
2633 |
|
include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, |
2634 |
|
pcretables); |
2635 |
|
if (include_compiled == NULL) |
2636 |
|
{ |
2637 |
|
fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n", |
2638 |
|
errptr, error); |
2639 |
|
goto EXIT2; |
2640 |
|
} |
2641 |
|
} |
2642 |
|
|
2643 |
|
if (exclude_dir_pattern != NULL) |
2644 |
|
{ |
2645 |
|
exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr, |
2646 |
|
pcretables); |
2647 |
|
if (exclude_dir_compiled == NULL) |
2648 |
|
{ |
2649 |
|
fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n", |
2650 |
|
errptr, error); |
2651 |
|
goto EXIT2; |
2652 |
|
} |
2653 |
|
} |
2654 |
|
|
2655 |
|
if (include_dir_pattern != NULL) |
2656 |
|
{ |
2657 |
|
include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr, |
2658 |
|
pcretables); |
2659 |
|
if (include_dir_compiled == NULL) |
2660 |
|
{ |
2661 |
|
fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n", |
2662 |
|
errptr, error); |
2663 |
|
goto EXIT2; |
2664 |
} |
} |
2665 |
} |
} |
2666 |
|
|
2667 |
/* If there are no further arguments, do the business on stdin and exit */ |
/* If there are no further arguments, do the business on stdin and exit. */ |
2668 |
|
|
2669 |
if (i >= argc) return pcregrep(stdin, NULL); |
if (i >= argc) |
2670 |
|
{ |
2671 |
|
rc = pcregrep(stdin, FR_PLAIN, stdin_name, |
2672 |
|
(filenames > FN_DEFAULT)? stdin_name : NULL); |
2673 |
|
goto EXIT; |
2674 |
|
} |
2675 |
|
|
2676 |
/* Otherwise, work through the remaining arguments as files or directories. |
/* Otherwise, work through the remaining arguments as files or directories. |
2677 |
Pass in the fact that there is only one argument at top level - this suppresses |
Pass in the fact that there is only one argument at top level - this suppresses |
2678 |
the file name if the argument is not a directory. */ |
the file name if the argument is not a directory and filenames are not |
2679 |
|
otherwise forced. */ |
2680 |
|
|
2681 |
only_one_at_top = (i == argc - 1); |
only_one_at_top = i == argc - 1; /* Catch initial value of i */ |
|
if (filenames_only) filenames = TRUE; |
|
2682 |
|
|
2683 |
for (; i < argc; i++) |
for (; i < argc; i++) |
2684 |
{ |
{ |
2685 |
int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top); |
int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE, |
2686 |
if (frc == 0 && rc == 1) rc = 0; |
only_one_at_top); |
2687 |
|
if (frc > 1) rc = frc; |
2688 |
|
else if (frc == 0 && rc == 1) rc = 0; |
2689 |
} |
} |
2690 |
|
|
2691 |
return rc; |
EXIT: |
2692 |
|
if (main_buffer != NULL) free(main_buffer); |
2693 |
|
if (pattern_list != NULL) |
2694 |
|
{ |
2695 |
|
for (i = 0; i < pattern_count; i++) free(pattern_list[i]); |
2696 |
|
free(pattern_list); |
2697 |
|
} |
2698 |
|
if (hints_list != NULL) |
2699 |
|
{ |
2700 |
|
for (i = 0; i < hint_count; i++) |
2701 |
|
{ |
2702 |
|
if (hints_list[i] != NULL) pcre_free_study(hints_list[i]); |
2703 |
|
} |
2704 |
|
free(hints_list); |
2705 |
|
} |
2706 |
|
pcregrep_exit(rc); |
2707 |
|
|
2708 |
|
EXIT2: |
2709 |
|
rc = 2; |
2710 |
|
goto EXIT; |
2711 |
} |
} |
2712 |
|
|
2713 |
/* End */ |
/* End of pcregrep */ |