3 |
*************************************************/ |
*************************************************/ |
4 |
|
|
5 |
/* This is a grep program that uses the PCRE regular expression library to do |
/* This is a grep program that uses the PCRE regular expression library to do |
6 |
its pattern matching. */ |
its pattern matching. On a Unix system it can recurse into directories. */ |
7 |
|
|
8 |
|
#include <ctype.h> |
9 |
#include <stdio.h> |
#include <stdio.h> |
10 |
#include <string.h> |
#include <string.h> |
11 |
#include <stdlib.h> |
#include <stdlib.h> |
18 |
|
|
19 |
typedef int BOOL; |
typedef int BOOL; |
20 |
|
|
21 |
|
#define VERSION "2.0 01-Aug-2001" |
22 |
|
#define MAX_PATTERN_COUNT 100 |
23 |
|
|
24 |
|
|
25 |
/************************************************* |
/************************************************* |
26 |
* Global variables * |
* Global variables * |
27 |
*************************************************/ |
*************************************************/ |
28 |
|
|
29 |
static pcre *pattern; |
static char *pattern_filename = NULL; |
30 |
static pcre_extra *hints; |
static int pattern_count = 0; |
31 |
|
static pcre **pattern_list; |
32 |
|
static pcre_extra **hints_list; |
33 |
|
|
34 |
static BOOL count_only = FALSE; |
static BOOL count_only = FALSE; |
35 |
|
static BOOL filenames = TRUE; |
36 |
static BOOL filenames_only = FALSE; |
static BOOL filenames_only = FALSE; |
37 |
static BOOL invert = FALSE; |
static BOOL invert = FALSE; |
38 |
static BOOL number = FALSE; |
static BOOL number = FALSE; |
39 |
|
static BOOL recurse = FALSE; |
40 |
static BOOL silent = FALSE; |
static BOOL silent = FALSE; |
41 |
static BOOL whole_lines = FALSE; |
static BOOL whole_lines = FALSE; |
42 |
|
|
43 |
|
/* Structure for options and list of them */ |
44 |
|
|
45 |
|
typedef struct option_item { |
46 |
|
int one_char; |
47 |
|
char *long_name; |
48 |
|
char *help_text; |
49 |
|
} option_item; |
50 |
|
|
51 |
|
static option_item optionlist[] = { |
52 |
|
{ -1, "help", "display this help and exit" }, |
53 |
|
{ 'c', "count", "print only a count of matching lines per FILE" }, |
54 |
|
{ 'h', "no-filename", "suppress the prefixing filename on output" }, |
55 |
|
{ 'i', "ignore-case", "ignore case distinctions" }, |
56 |
|
{ 'l', "files-with-matches", "print only FILE names containing matches" }, |
57 |
|
{ 'n', "line-number", "print line number with output lines" }, |
58 |
|
{ 'r', "recursive", "recursively scan sub-directories" }, |
59 |
|
{ 's', "no-messages", "suppress error messages" }, |
60 |
|
{ 'V', "version", "print version information and exit" }, |
61 |
|
{ 'v', "invert-match", "select non-matching lines" }, |
62 |
|
{ 'x', "line-regex", "force PATTERN to match only whole lines" }, |
63 |
|
{ 'x', "line-regexp", "force PATTERN to match only whole lines" }, |
64 |
|
{ 0, NULL, NULL } |
65 |
|
}; |
66 |
|
|
67 |
|
|
68 |
|
/************************************************* |
69 |
|
* Functions for directory scanning * |
70 |
|
*************************************************/ |
71 |
|
|
72 |
|
/* These functions are defined so that they can be made system specific, |
73 |
|
although at present the only ones are for Unix, and for "no directory recursion |
74 |
|
support". */ |
75 |
|
|
76 |
|
|
77 |
|
/************* Directory scanning in Unix ***********/ |
78 |
|
|
79 |
|
#if IS_UNIX |
80 |
|
#include <sys/types.h> |
81 |
|
#include <sys/stat.h> |
82 |
|
#include <dirent.h> |
83 |
|
|
84 |
|
typedef DIR directory_type; |
85 |
|
|
86 |
|
int |
87 |
|
isdirectory(char *filename) |
88 |
|
{ |
89 |
|
struct stat statbuf; |
90 |
|
if (stat(filename, &statbuf) < 0) |
91 |
|
return 0; /* In the expectation that opening as a file will fail */ |
92 |
|
return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0; |
93 |
|
} |
94 |
|
|
95 |
|
directory_type * |
96 |
|
opendirectory(char *filename) |
97 |
|
{ |
98 |
|
return opendir(filename); |
99 |
|
} |
100 |
|
|
101 |
|
char * |
102 |
|
readdirectory(directory_type *dir) |
103 |
|
{ |
104 |
|
for (;;) |
105 |
|
{ |
106 |
|
struct dirent *dent = readdir(dir); |
107 |
|
if (dent == NULL) return NULL; |
108 |
|
if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0) |
109 |
|
return dent->d_name; |
110 |
|
} |
111 |
|
return NULL; /* Keep compiler happy; never executed */ |
112 |
|
} |
113 |
|
|
114 |
|
void |
115 |
|
closedirectory(directory_type *dir) |
116 |
|
{ |
117 |
|
closedir(dir); |
118 |
|
} |
119 |
|
|
120 |
|
|
121 |
|
#else |
122 |
|
|
123 |
|
|
124 |
|
/************* Directory scanning when we can't do it ***********/ |
125 |
|
|
126 |
|
/* The type is void, and apart from isdirectory(), the functions do nothing. */ |
127 |
|
|
128 |
|
typedef void directory_type; |
129 |
|
|
130 |
|
int isdirectory(char *filename) { return FALSE; } |
131 |
|
directory_type * opendirectory(char *filename) {} |
132 |
|
char *readdirectory(directory_type *dir) {} |
133 |
|
void closedirectory(directory_type *dir) {} |
134 |
|
|
135 |
|
#endif |
136 |
|
|
137 |
|
|
138 |
|
|
139 |
#if ! HAVE_STRERROR |
#if ! HAVE_STRERROR |
173 |
|
|
174 |
while (fgets(buffer, sizeof(buffer), in) != NULL) |
while (fgets(buffer, sizeof(buffer), in) != NULL) |
175 |
{ |
{ |
176 |
BOOL match; |
BOOL match = FALSE; |
177 |
|
int i; |
178 |
int length = (int)strlen(buffer); |
int length = (int)strlen(buffer); |
179 |
if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0; |
if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0; |
180 |
linenumber++; |
linenumber++; |
181 |
|
|
182 |
match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0; |
for (i = 0; !match && i < pattern_count; i++) |
183 |
if (match && whole_lines && offsets[1] != length) match = FALSE; |
{ |
184 |
|
match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0, |
185 |
|
offsets, 99) >= 0; |
186 |
|
if (match && whole_lines && offsets[1] != length) match = FALSE; |
187 |
|
} |
188 |
|
|
189 |
if (match != invert) |
if (match != invert) |
190 |
{ |
{ |
222 |
|
|
223 |
|
|
224 |
/************************************************* |
/************************************************* |
225 |
|
* Grep a file or recurse into a directory * |
226 |
|
*************************************************/ |
227 |
|
|
228 |
|
static int |
229 |
|
grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames, |
230 |
|
BOOL only_one_at_top) |
231 |
|
{ |
232 |
|
int rc = 1; |
233 |
|
int sep; |
234 |
|
FILE *in; |
235 |
|
|
236 |
|
/* If the file is a directory and we are recursing, scan each file within it. |
237 |
|
The scanning code is localized so it can be made system-specific. */ |
238 |
|
|
239 |
|
if ((sep = isdirectory(filename)) != 0 && recurse) |
240 |
|
{ |
241 |
|
char buffer[1024]; |
242 |
|
char *nextfile; |
243 |
|
directory_type *dir = opendirectory(filename); |
244 |
|
|
245 |
|
if (dir == NULL) |
246 |
|
{ |
247 |
|
fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename, |
248 |
|
strerror(errno)); |
249 |
|
return 2; |
250 |
|
} |
251 |
|
|
252 |
|
while ((nextfile = readdirectory(dir)) != NULL) |
253 |
|
{ |
254 |
|
int frc; |
255 |
|
sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile); |
256 |
|
frc = grep_or_recurse(buffer, recurse, TRUE, FALSE); |
257 |
|
if (frc == 0 && rc == 1) rc = 0; |
258 |
|
} |
259 |
|
|
260 |
|
closedirectory(dir); |
261 |
|
return rc; |
262 |
|
} |
263 |
|
|
264 |
|
/* If the file is not a directory, or we are not recursing, scan it. If this is |
265 |
|
the first and only argument at top level, we don't show the file name. |
266 |
|
Otherwise, control is via the show_filenames variable. */ |
267 |
|
|
268 |
|
in = fopen(filename, "r"); |
269 |
|
if (in == NULL) |
270 |
|
{ |
271 |
|
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno)); |
272 |
|
return 2; |
273 |
|
} |
274 |
|
|
275 |
|
rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL); |
276 |
|
fclose(in); |
277 |
|
return rc; |
278 |
|
} |
279 |
|
|
280 |
|
|
281 |
|
|
282 |
|
|
283 |
|
/************************************************* |
284 |
* Usage function * |
* Usage function * |
285 |
*************************************************/ |
*************************************************/ |
286 |
|
|
287 |
static int |
static int |
288 |
usage(int rc) |
usage(int rc) |
289 |
{ |
{ |
290 |
fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n"); |
fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n"); |
291 |
|
fprintf(stderr, "Type `pcregrep --help' for more information.\n"); |
292 |
return rc; |
return rc; |
293 |
} |
} |
294 |
|
|
296 |
|
|
297 |
|
|
298 |
/************************************************* |
/************************************************* |
299 |
|
* Help function * |
300 |
|
*************************************************/ |
301 |
|
|
302 |
|
static void |
303 |
|
help(void) |
304 |
|
{ |
305 |
|
option_item *op; |
306 |
|
|
307 |
|
printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n"); |
308 |
|
printf("Search for PATTERN in each FILE or standard input.\n"); |
309 |
|
printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n"); |
310 |
|
|
311 |
|
printf("Options:\n"); |
312 |
|
|
313 |
|
for (op = optionlist; op->one_char != 0; op++) |
314 |
|
{ |
315 |
|
int n; |
316 |
|
char s[4]; |
317 |
|
if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " "); |
318 |
|
printf(" %s --%s%n", s, op->long_name, &n); |
319 |
|
n = 30 - n; |
320 |
|
if (n < 1) n = 1; |
321 |
|
printf("%.*s%s\n", n, " ", op->help_text); |
322 |
|
} |
323 |
|
|
324 |
|
printf("\n -f<filename> or --file=<filename>\n"); |
325 |
|
printf(" Read patterns from <filename> instead of using a command line option.\n"); |
326 |
|
printf(" Trailing white space is removed; blanks lines are ignored.\n"); |
327 |
|
printf(" There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT); |
328 |
|
|
329 |
|
printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n"); |
330 |
|
printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n"); |
331 |
|
} |
332 |
|
|
333 |
|
|
334 |
|
|
335 |
|
|
336 |
|
/************************************************* |
337 |
|
* Handle an option * |
338 |
|
*************************************************/ |
339 |
|
|
340 |
|
static int |
341 |
|
handle_option(int letter, int options) |
342 |
|
{ |
343 |
|
switch(letter) |
344 |
|
{ |
345 |
|
case -1: help(); exit(0); |
346 |
|
case 'c': count_only = TRUE; break; |
347 |
|
case 'h': filenames = FALSE; break; |
348 |
|
case 'i': options |= PCRE_CASELESS; break; |
349 |
|
case 'l': filenames_only = TRUE; |
350 |
|
case 'n': number = TRUE; break; |
351 |
|
case 'r': recurse = TRUE; break; |
352 |
|
case 's': silent = TRUE; break; |
353 |
|
case 'v': invert = TRUE; break; |
354 |
|
case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break; |
355 |
|
|
356 |
|
case 'V': |
357 |
|
fprintf(stderr, "pcregrep version %s using ", VERSION); |
358 |
|
fprintf(stderr, "PCRE version %s\n", pcre_version()); |
359 |
|
exit(0); |
360 |
|
break; |
361 |
|
|
362 |
|
default: |
363 |
|
fprintf(stderr, "pcregrep: Unknown option -%c\n", letter); |
364 |
|
exit(usage(2)); |
365 |
|
} |
366 |
|
|
367 |
|
return options; |
368 |
|
} |
369 |
|
|
370 |
|
|
371 |
|
|
372 |
|
|
373 |
|
/************************************************* |
374 |
* Main program * |
* Main program * |
375 |
*************************************************/ |
*************************************************/ |
376 |
|
|
377 |
int |
int |
378 |
main(int argc, char **argv) |
main(int argc, char **argv) |
379 |
{ |
{ |
380 |
int i; |
int i, j; |
381 |
int rc = 1; |
int rc = 1; |
382 |
int options = 0; |
int options = 0; |
383 |
int errptr; |
int errptr; |
384 |
const char *error; |
const char *error; |
385 |
BOOL filenames = TRUE; |
BOOL only_one_at_top; |
386 |
|
|
387 |
/* Process the options */ |
/* Process the options */ |
388 |
|
|
389 |
for (i = 1; i < argc; i++) |
for (i = 1; i < argc; i++) |
390 |
{ |
{ |
|
char *s; |
|
391 |
if (argv[i][0] != '-') break; |
if (argv[i][0] != '-') break; |
392 |
s = argv[i] + 1; |
|
393 |
while (*s != 0) |
/* Long name options */ |
394 |
|
|
395 |
|
if (argv[i][1] == '-') |
396 |
{ |
{ |
397 |
switch (*s++) |
option_item *op; |
398 |
|
|
399 |
|
if (strncmp(argv[i]+2, "file=", 5) == 0) |
400 |
|
{ |
401 |
|
pattern_filename = argv[i] + 7; |
402 |
|
continue; |
403 |
|
} |
404 |
|
|
405 |
|
for (op = optionlist; op->one_char != 0; op++) |
406 |
{ |
{ |
407 |
case 'c': count_only = TRUE; break; |
if (strcmp(argv[i]+2, op->long_name) == 0) |
408 |
case 'h': filenames = FALSE; break; |
{ |
409 |
case 'i': options |= PCRE_CASELESS; break; |
options = handle_option(op->one_char, options); |
410 |
case 'l': filenames_only = TRUE; |
break; |
411 |
case 'n': number = TRUE; break; |
} |
412 |
case 's': silent = TRUE; break; |
} |
413 |
case 'v': invert = TRUE; break; |
if (op->one_char == 0) |
414 |
case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break; |
{ |
415 |
|
fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]); |
416 |
|
exit(usage(2)); |
417 |
|
} |
418 |
|
} |
419 |
|
|
420 |
case 'V': |
/* One-char options */ |
|
fprintf(stderr, "PCRE version %s\n", pcre_version()); |
|
|
break; |
|
421 |
|
|
422 |
default: |
else |
423 |
fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]); |
{ |
424 |
return usage(2); |
char *s = argv[i] + 1; |
425 |
|
while (*s != 0) |
426 |
|
{ |
427 |
|
if (*s == 'f') |
428 |
|
{ |
429 |
|
pattern_filename = s + 1; |
430 |
|
if (pattern_filename[0] == 0) |
431 |
|
{ |
432 |
|
if (i >= argc - 1) |
433 |
|
{ |
434 |
|
fprintf(stderr, "pcregrep: File name missing after -f\n"); |
435 |
|
exit(usage(2)); |
436 |
|
} |
437 |
|
pattern_filename = argv[++i]; |
438 |
|
} |
439 |
|
break; |
440 |
|
} |
441 |
|
else options = handle_option(*s++, options); |
442 |
} |
} |
443 |
} |
} |
444 |
} |
} |
445 |
|
|
446 |
/* There must be at least a regexp argument */ |
pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *)); |
447 |
|
hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *)); |
448 |
|
|
449 |
if (i >= argc) return usage(0); |
if (pattern_list == NULL || hints_list == NULL) |
450 |
|
{ |
451 |
|
fprintf(stderr, "pcregrep: malloc failed\n"); |
452 |
|
return 2; |
453 |
|
} |
454 |
|
|
455 |
/* Compile the regular expression. */ |
/* Compile the regular expression(s). */ |
456 |
|
|
457 |
pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL); |
if (pattern_filename != NULL) |
|
if (pattern == NULL) |
|
458 |
{ |
{ |
459 |
fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error); |
FILE *f = fopen(pattern_filename, "r"); |
460 |
return 2; |
char buffer[BUFSIZ]; |
461 |
|
if (f == NULL) |
462 |
|
{ |
463 |
|
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename, |
464 |
|
strerror(errno)); |
465 |
|
return 2; |
466 |
|
} |
467 |
|
while (fgets(buffer, sizeof(buffer), f) != NULL) |
468 |
|
{ |
469 |
|
char *s = buffer + (int)strlen(buffer); |
470 |
|
if (pattern_count >= MAX_PATTERN_COUNT) |
471 |
|
{ |
472 |
|
fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n", |
473 |
|
MAX_PATTERN_COUNT); |
474 |
|
return 2; |
475 |
|
} |
476 |
|
while (s > buffer && isspace((unsigned char)(s[-1]))) s--; |
477 |
|
if (s == buffer) continue; |
478 |
|
*s = 0; |
479 |
|
pattern_list[pattern_count] = pcre_compile(buffer, options, &error, |
480 |
|
&errptr, NULL); |
481 |
|
if (pattern_list[pattern_count++] == NULL) |
482 |
|
{ |
483 |
|
fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n", |
484 |
|
pattern_count, errptr, error); |
485 |
|
return 2; |
486 |
|
} |
487 |
|
} |
488 |
|
fclose(f); |
489 |
} |
} |
490 |
|
|
491 |
/* Study the regular expression, as we will be running it may times */ |
/* If no file name, a single regex must be given inline */ |
492 |
|
|
493 |
hints = pcre_study(pattern, 0, &error); |
else |
|
if (error != NULL) |
|
494 |
{ |
{ |
495 |
fprintf(stderr, "pcregrep: error while studing regex: %s\n", error); |
if (i >= argc) return usage(0); |
496 |
return 2; |
pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL); |
497 |
|
if (pattern_list[0] == NULL) |
498 |
|
{ |
499 |
|
fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr, |
500 |
|
error); |
501 |
|
return 2; |
502 |
|
} |
503 |
|
pattern_count++; |
504 |
|
} |
505 |
|
|
506 |
|
/* Study the regular expressions, as we will be running them may times */ |
507 |
|
|
508 |
|
for (j = 0; j < pattern_count; j++) |
509 |
|
{ |
510 |
|
hints_list[j] = pcre_study(pattern_list[j], 0, &error); |
511 |
|
if (error != NULL) |
512 |
|
{ |
513 |
|
char s[16]; |
514 |
|
if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j); |
515 |
|
fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error); |
516 |
|
return 2; |
517 |
|
} |
518 |
} |
} |
519 |
|
|
520 |
/* If there are no further arguments, do the business on stdin and exit */ |
/* If there are no further arguments, do the business on stdin and exit */ |
521 |
|
|
522 |
if (i >= argc) return pcregrep(stdin, NULL); |
if (i >= argc) return pcregrep(stdin, NULL); |
523 |
|
|
524 |
/* Otherwise, work through the remaining arguments as files. If there is only |
/* Otherwise, work through the remaining arguments as files or directories. |
525 |
one, don't give its name on the output. */ |
Pass in the fact that there is only one argument at top level - this suppresses |
526 |
|
the file name if the argument is not a directory. */ |
527 |
|
|
528 |
if (i == argc - 1) filenames = FALSE; |
only_one_at_top = (i == argc - 1); |
529 |
if (filenames_only) filenames = TRUE; |
if (filenames_only) filenames = TRUE; |
530 |
|
|
531 |
for (; i < argc; i++) |
for (; i < argc; i++) |
532 |
{ |
{ |
533 |
FILE *in = fopen(argv[i], "r"); |
int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top); |
534 |
if (in == NULL) |
if (frc == 0 && rc == 1) rc = 0; |
|
{ |
|
|
fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno)); |
|
|
rc = 2; |
|
|
} |
|
|
else |
|
|
{ |
|
|
int frc = pcregrep(in, filenames? argv[i] : NULL); |
|
|
if (frc == 0 && rc == 1) rc = 0; |
|
|
fclose(in); |
|
|
} |
|
535 |
} |
} |
536 |
|
|
537 |
return rc; |
return rc; |