/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 377 - (show annotations)
Sun Mar 1 12:07:19 2009 UTC (6 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 67322 byte(s)
Error occurred while calculating annotation data.
Use colour when requested with -o in pcregrep.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2009 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74
75 #if BUFSIZ > 8192
76 #define MBUFTHIRD BUFSIZ
77 #else
78 #define MBUFTHIRD 8192
79 #endif
80
81 /* Values for the "filenames" variable, which specifies options for file name
82 output. The order is important; it is assumed that a file name is wanted for
83 all values greater than FN_DEFAULT. */
84
85 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86
87 /* File reading styles */
88
89 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90
91 /* Actions for the -d and -D options */
92
93 enum { dee_READ, dee_SKIP, dee_RECURSE };
94 enum { DEE_READ, DEE_SKIP };
95
96 /* Actions for special processing options (flag bits) */
97
98 #define PO_WORD_MATCH 0x0001
99 #define PO_LINE_MATCH 0x0002
100 #define PO_FIXED_STRINGS 0x0004
101
102 /* Line ending types */
103
104 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105
106
107
108 /*************************************************
109 * Global variables *
110 *************************************************/
111
112 /* Jeffrey Friedl has some debugging requirements that are not part of the
113 regular code. */
114
115 #ifdef JFRIEDL_DEBUG
116 static int S_arg = -1;
117 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
118 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
119 static const char *jfriedl_prefix = "";
120 static const char *jfriedl_postfix = "";
121 #endif
122
123 static int endlinetype;
124
125 static char *colour_string = (char *)"1;31";
126 static char *colour_option = NULL;
127 static char *dee_option = NULL;
128 static char *DEE_option = NULL;
129 static char *newline = NULL;
130 static char *pattern_filename = NULL;
131 static char *stdin_name = (char *)"(standard input)";
132 static char *locale = NULL;
133
134 static const unsigned char *pcretables = NULL;
135
136 static int pattern_count = 0;
137 static pcre **pattern_list = NULL;
138 static pcre_extra **hints_list = NULL;
139
140 static char *include_pattern = NULL;
141 static char *exclude_pattern = NULL;
142 static char *include_dir_pattern = NULL;
143 static char *exclude_dir_pattern = NULL;
144
145 static pcre *include_compiled = NULL;
146 static pcre *exclude_compiled = NULL;
147 static pcre *include_dir_compiled = NULL;
148 static pcre *exclude_dir_compiled = NULL;
149
150 static int after_context = 0;
151 static int before_context = 0;
152 static int both_context = 0;
153 static int dee_action = dee_READ;
154 static int DEE_action = DEE_READ;
155 static int error_count = 0;
156 static int filenames = FN_DEFAULT;
157 static int process_options = 0;
158
159 static BOOL count_only = FALSE;
160 static BOOL do_colour = FALSE;
161 static BOOL file_offsets = FALSE;
162 static BOOL hyphenpending = FALSE;
163 static BOOL invert = FALSE;
164 static BOOL line_offsets = FALSE;
165 static BOOL multiline = FALSE;
166 static BOOL number = FALSE;
167 static BOOL only_matching = FALSE;
168 static BOOL quiet = FALSE;
169 static BOOL silent = FALSE;
170 static BOOL utf8 = FALSE;
171
172 /* Structure for options and list of them */
173
174 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
175 OP_PATLIST };
176
177 typedef struct option_item {
178 int type;
179 int one_char;
180 void *dataptr;
181 const char *long_name;
182 const char *help_text;
183 } option_item;
184
185 /* Options without a single-letter equivalent get a negative value. This can be
186 used to identify them. */
187
188 #define N_COLOUR (-1)
189 #define N_EXCLUDE (-2)
190 #define N_EXCLUDE_DIR (-3)
191 #define N_HELP (-4)
192 #define N_INCLUDE (-5)
193 #define N_INCLUDE_DIR (-6)
194 #define N_LABEL (-7)
195 #define N_LOCALE (-8)
196 #define N_NULL (-9)
197 #define N_LOFFSETS (-10)
198 #define N_FOFFSETS (-11)
199
200 static option_item optionlist[] = {
201 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
202 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
203 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
204 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
205 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
206 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
207 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
208 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
209 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
210 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
211 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
212 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
213 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
214 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
215 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
216 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
217 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
218 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
219 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
220 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
221 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
222 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
223 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
224 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
225 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
226 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
227 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
228 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
229 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
230 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
231 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
232 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
233 #ifdef JFRIEDL_DEBUG
234 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
235 #endif
236 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
237 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
238 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
239 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
240 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
241 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
242 { OP_NODATA, 0, NULL, NULL, NULL }
243 };
244
245 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
246 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
247 that the combination of -w and -x has the same effect as -x on its own, so we
248 can treat them as the same. */
249
250 static const char *prefix[] = {
251 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
252
253 static const char *suffix[] = {
254 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
255
256 /* UTF-8 tables - used only when the newline setting is "any". */
257
258 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
259
260 const char utf8_table4[] = {
261 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
262 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
264 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
265
266
267
268 /*************************************************
269 * OS-specific functions *
270 *************************************************/
271
272 /* These functions are defined so that they can be made system specific,
273 although at present the only ones are for Unix, Win32, and for "no support". */
274
275
276 /************* Directory scanning in Unix ***********/
277
278 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
279 #include <sys/types.h>
280 #include <sys/stat.h>
281 #include <dirent.h>
282
283 typedef DIR directory_type;
284
285 static int
286 isdirectory(char *filename)
287 {
288 struct stat statbuf;
289 if (stat(filename, &statbuf) < 0)
290 return 0; /* In the expectation that opening as a file will fail */
291 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
292 }
293
294 static directory_type *
295 opendirectory(char *filename)
296 {
297 return opendir(filename);
298 }
299
300 static char *
301 readdirectory(directory_type *dir)
302 {
303 for (;;)
304 {
305 struct dirent *dent = readdir(dir);
306 if (dent == NULL) return NULL;
307 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
308 return dent->d_name;
309 }
310 /* Control never reaches here */
311 }
312
313 static void
314 closedirectory(directory_type *dir)
315 {
316 closedir(dir);
317 }
318
319
320 /************* Test for regular file in Unix **********/
321
322 static int
323 isregfile(char *filename)
324 {
325 struct stat statbuf;
326 if (stat(filename, &statbuf) < 0)
327 return 1; /* In the expectation that opening as a file will fail */
328 return (statbuf.st_mode & S_IFMT) == S_IFREG;
329 }
330
331
332 /************* Test stdout for being a terminal in Unix **********/
333
334 static BOOL
335 is_stdout_tty(void)
336 {
337 return isatty(fileno(stdout));
338 }
339
340
341 /************* Directory scanning in Win32 ***********/
342
343 /* I (Philip Hazel) have no means of testing this code. It was contributed by
344 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
345 when it did not exist. David Byron added a patch that moved the #include of
346 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
347 */
348
349 #elif HAVE_WINDOWS_H
350
351 #ifndef STRICT
352 # define STRICT
353 #endif
354 #ifndef WIN32_LEAN_AND_MEAN
355 # define WIN32_LEAN_AND_MEAN
356 #endif
357
358 #include <windows.h>
359
360 #ifndef INVALID_FILE_ATTRIBUTES
361 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
362 #endif
363
364 typedef struct directory_type
365 {
366 HANDLE handle;
367 BOOL first;
368 WIN32_FIND_DATA data;
369 } directory_type;
370
371 int
372 isdirectory(char *filename)
373 {
374 DWORD attr = GetFileAttributes(filename);
375 if (attr == INVALID_FILE_ATTRIBUTES)
376 return 0;
377 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
378 }
379
380 directory_type *
381 opendirectory(char *filename)
382 {
383 size_t len;
384 char *pattern;
385 directory_type *dir;
386 DWORD err;
387 len = strlen(filename);
388 pattern = (char *) malloc(len + 3);
389 dir = (directory_type *) malloc(sizeof(*dir));
390 if ((pattern == NULL) || (dir == NULL))
391 {
392 fprintf(stderr, "pcregrep: malloc failed\n");
393 exit(2);
394 }
395 memcpy(pattern, filename, len);
396 memcpy(&(pattern[len]), "\\*", 3);
397 dir->handle = FindFirstFile(pattern, &(dir->data));
398 if (dir->handle != INVALID_HANDLE_VALUE)
399 {
400 free(pattern);
401 dir->first = TRUE;
402 return dir;
403 }
404 err = GetLastError();
405 free(pattern);
406 free(dir);
407 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
408 return NULL;
409 }
410
411 char *
412 readdirectory(directory_type *dir)
413 {
414 for (;;)
415 {
416 if (!dir->first)
417 {
418 if (!FindNextFile(dir->handle, &(dir->data)))
419 return NULL;
420 }
421 else
422 {
423 dir->first = FALSE;
424 }
425 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
426 return dir->data.cFileName;
427 }
428 #ifndef _MSC_VER
429 return NULL; /* Keep compiler happy; never executed */
430 #endif
431 }
432
433 void
434 closedirectory(directory_type *dir)
435 {
436 FindClose(dir->handle);
437 free(dir);
438 }
439
440
441 /************* Test for regular file in Win32 **********/
442
443 /* I don't know how to do this, or if it can be done; assume all paths are
444 regular if they are not directories. */
445
446 int isregfile(char *filename)
447 {
448 return !isdirectory(filename);
449 }
450
451
452 /************* Test stdout for being a terminal in Win32 **********/
453
454 /* I don't know how to do this; assume never */
455
456 static BOOL
457 is_stdout_tty(void)
458 {
459 return FALSE;
460 }
461
462
463 /************* Directory scanning when we can't do it ***********/
464
465 /* The type is void, and apart from isdirectory(), the functions do nothing. */
466
467 #else
468
469 typedef void directory_type;
470
471 int isdirectory(char *filename) { return 0; }
472 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
473 char *readdirectory(directory_type *dir) { return (char*)0;}
474 void closedirectory(directory_type *dir) {}
475
476
477 /************* Test for regular when we can't do it **********/
478
479 /* Assume all files are regular. */
480
481 int isregfile(char *filename) { return 1; }
482
483
484 /************* Test stdout for being a terminal when we can't do it **********/
485
486 static BOOL
487 is_stdout_tty(void)
488 {
489 return FALSE;
490 }
491
492
493 #endif
494
495
496
497 #ifndef HAVE_STRERROR
498 /*************************************************
499 * Provide strerror() for non-ANSI libraries *
500 *************************************************/
501
502 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
503 in their libraries, but can provide the same facility by this simple
504 alternative function. */
505
506 extern int sys_nerr;
507 extern char *sys_errlist[];
508
509 char *
510 strerror(int n)
511 {
512 if (n < 0 || n >= sys_nerr) return "unknown error number";
513 return sys_errlist[n];
514 }
515 #endif /* HAVE_STRERROR */
516
517
518
519 /*************************************************
520 * Find end of line *
521 *************************************************/
522
523 /* The length of the endline sequence that is found is set via lenptr. This may
524 be zero at the very end of the file if there is no line-ending sequence there.
525
526 Arguments:
527 p current position in line
528 endptr end of available data
529 lenptr where to put the length of the eol sequence
530
531 Returns: pointer to the last byte of the line
532 */
533
534 static char *
535 end_of_line(char *p, char *endptr, int *lenptr)
536 {
537 switch(endlinetype)
538 {
539 default: /* Just in case */
540 case EL_LF:
541 while (p < endptr && *p != '\n') p++;
542 if (p < endptr)
543 {
544 *lenptr = 1;
545 return p + 1;
546 }
547 *lenptr = 0;
548 return endptr;
549
550 case EL_CR:
551 while (p < endptr && *p != '\r') p++;
552 if (p < endptr)
553 {
554 *lenptr = 1;
555 return p + 1;
556 }
557 *lenptr = 0;
558 return endptr;
559
560 case EL_CRLF:
561 for (;;)
562 {
563 while (p < endptr && *p != '\r') p++;
564 if (++p >= endptr)
565 {
566 *lenptr = 0;
567 return endptr;
568 }
569 if (*p == '\n')
570 {
571 *lenptr = 2;
572 return p + 1;
573 }
574 }
575 break;
576
577 case EL_ANYCRLF:
578 while (p < endptr)
579 {
580 int extra = 0;
581 register int c = *((unsigned char *)p);
582
583 if (utf8 && c >= 0xc0)
584 {
585 int gcii, gcss;
586 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
587 gcss = 6*extra;
588 c = (c & utf8_table3[extra]) << gcss;
589 for (gcii = 1; gcii <= extra; gcii++)
590 {
591 gcss -= 6;
592 c |= (p[gcii] & 0x3f) << gcss;
593 }
594 }
595
596 p += 1 + extra;
597
598 switch (c)
599 {
600 case 0x0a: /* LF */
601 *lenptr = 1;
602 return p;
603
604 case 0x0d: /* CR */
605 if (p < endptr && *p == 0x0a)
606 {
607 *lenptr = 2;
608 p++;
609 }
610 else *lenptr = 1;
611 return p;
612
613 default:
614 break;
615 }
616 } /* End of loop for ANYCRLF case */
617
618 *lenptr = 0; /* Must have hit the end */
619 return endptr;
620
621 case EL_ANY:
622 while (p < endptr)
623 {
624 int extra = 0;
625 register int c = *((unsigned char *)p);
626
627 if (utf8 && c >= 0xc0)
628 {
629 int gcii, gcss;
630 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
631 gcss = 6*extra;
632 c = (c & utf8_table3[extra]) << gcss;
633 for (gcii = 1; gcii <= extra; gcii++)
634 {
635 gcss -= 6;
636 c |= (p[gcii] & 0x3f) << gcss;
637 }
638 }
639
640 p += 1 + extra;
641
642 switch (c)
643 {
644 case 0x0a: /* LF */
645 case 0x0b: /* VT */
646 case 0x0c: /* FF */
647 *lenptr = 1;
648 return p;
649
650 case 0x0d: /* CR */
651 if (p < endptr && *p == 0x0a)
652 {
653 *lenptr = 2;
654 p++;
655 }
656 else *lenptr = 1;
657 return p;
658
659 case 0x85: /* NEL */
660 *lenptr = utf8? 2 : 1;
661 return p;
662
663 case 0x2028: /* LS */
664 case 0x2029: /* PS */
665 *lenptr = 3;
666 return p;
667
668 default:
669 break;
670 }
671 } /* End of loop for ANY case */
672
673 *lenptr = 0; /* Must have hit the end */
674 return endptr;
675 } /* End of overall switch */
676 }
677
678
679
680 /*************************************************
681 * Find start of previous line *
682 *************************************************/
683
684 /* This is called when looking back for before lines to print.
685
686 Arguments:
687 p start of the subsequent line
688 startptr start of available data
689
690 Returns: pointer to the start of the previous line
691 */
692
693 static char *
694 previous_line(char *p, char *startptr)
695 {
696 switch(endlinetype)
697 {
698 default: /* Just in case */
699 case EL_LF:
700 p--;
701 while (p > startptr && p[-1] != '\n') p--;
702 return p;
703
704 case EL_CR:
705 p--;
706 while (p > startptr && p[-1] != '\n') p--;
707 return p;
708
709 case EL_CRLF:
710 for (;;)
711 {
712 p -= 2;
713 while (p > startptr && p[-1] != '\n') p--;
714 if (p <= startptr + 1 || p[-2] == '\r') return p;
715 }
716 return p; /* But control should never get here */
717
718 case EL_ANY:
719 case EL_ANYCRLF:
720 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
721 if (utf8) while ((*p & 0xc0) == 0x80) p--;
722
723 while (p > startptr)
724 {
725 register int c;
726 char *pp = p - 1;
727
728 if (utf8)
729 {
730 int extra = 0;
731 while ((*pp & 0xc0) == 0x80) pp--;
732 c = *((unsigned char *)pp);
733 if (c >= 0xc0)
734 {
735 int gcii, gcss;
736 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
737 gcss = 6*extra;
738 c = (c & utf8_table3[extra]) << gcss;
739 for (gcii = 1; gcii <= extra; gcii++)
740 {
741 gcss -= 6;
742 c |= (pp[gcii] & 0x3f) << gcss;
743 }
744 }
745 }
746 else c = *((unsigned char *)pp);
747
748 if (endlinetype == EL_ANYCRLF) switch (c)
749 {
750 case 0x0a: /* LF */
751 case 0x0d: /* CR */
752 return p;
753
754 default:
755 break;
756 }
757
758 else switch (c)
759 {
760 case 0x0a: /* LF */
761 case 0x0b: /* VT */
762 case 0x0c: /* FF */
763 case 0x0d: /* CR */
764 case 0x85: /* NEL */
765 case 0x2028: /* LS */
766 case 0x2029: /* PS */
767 return p;
768
769 default:
770 break;
771 }
772
773 p = pp; /* Back one character */
774 } /* End of loop for ANY case */
775
776 return startptr; /* Hit start of data */
777 } /* End of overall switch */
778 }
779
780
781
782
783
784 /*************************************************
785 * Print the previous "after" lines *
786 *************************************************/
787
788 /* This is called if we are about to lose said lines because of buffer filling,
789 and at the end of the file. The data in the line is written using fwrite() so
790 that a binary zero does not terminate it.
791
792 Arguments:
793 lastmatchnumber the number of the last matching line, plus one
794 lastmatchrestart where we restarted after the last match
795 endptr end of available data
796 printname filename for printing
797
798 Returns: nothing
799 */
800
801 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
802 char *endptr, char *printname)
803 {
804 if (after_context > 0 && lastmatchnumber > 0)
805 {
806 int count = 0;
807 while (lastmatchrestart < endptr && count++ < after_context)
808 {
809 int ellength;
810 char *pp = lastmatchrestart;
811 if (printname != NULL) fprintf(stdout, "%s-", printname);
812 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
813 pp = end_of_line(pp, endptr, &ellength);
814 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
815 lastmatchrestart = pp;
816 }
817 hyphenpending = TRUE;
818 }
819 }
820
821
822
823 /*************************************************
824 * Grep an individual file *
825 *************************************************/
826
827 /* This is called from grep_or_recurse() below. It uses a buffer that is three
828 times the value of MBUFTHIRD. The matching point is never allowed to stray into
829 the top third of the buffer, thus keeping more of the file available for
830 context printing or for multiline scanning. For large files, the pointer will
831 be in the middle third most of the time, so the bottom third is available for
832 "before" context printing.
833
834 Arguments:
835 handle the fopened FILE stream for a normal file
836 the gzFile pointer when reading is via libz
837 the BZFILE pointer when reading is via libbz2
838 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
839 printname the file name if it is to be printed for each match
840 or NULL if the file name is not to be printed
841 it cannot be NULL if filenames[_nomatch]_only is set
842
843 Returns: 0 if there was at least one match
844 1 otherwise (no matches)
845 2 if there is a read error on a .bz2 file
846 */
847
848 static int
849 pcregrep(void *handle, int frtype, char *printname)
850 {
851 int rc = 1;
852 int linenumber = 1;
853 int lastmatchnumber = 0;
854 int count = 0;
855 int filepos = 0;
856 int offsets[99];
857 char *lastmatchrestart = NULL;
858 char buffer[3*MBUFTHIRD];
859 char *ptr = buffer;
860 char *endptr;
861 size_t bufflength;
862 BOOL endhyphenpending = FALSE;
863 FILE *in = NULL; /* Ensure initialized */
864
865 #ifdef SUPPORT_LIBZ
866 gzFile ingz = NULL;
867 #endif
868
869 #ifdef SUPPORT_LIBBZ2
870 BZFILE *inbz2 = NULL;
871 #endif
872
873
874 /* Do the first read into the start of the buffer and set up the pointer to end
875 of what we have. In the case of libz, a non-zipped .gz file will be read as a
876 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
877 fail. */
878
879 #ifdef SUPPORT_LIBZ
880 if (frtype == FR_LIBZ)
881 {
882 ingz = (gzFile)handle;
883 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
884 }
885 else
886 #endif
887
888 #ifdef SUPPORT_LIBBZ2
889 if (frtype == FR_LIBBZ2)
890 {
891 inbz2 = (BZFILE *)handle;
892 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
893 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
894 } /* without the cast it is unsigned. */
895 else
896 #endif
897
898 {
899 in = (FILE *)handle;
900 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
901 }
902
903 endptr = buffer + bufflength;
904
905 /* Loop while the current pointer is not at the end of the file. For large
906 files, endptr will be at the end of the buffer when we are in the middle of the
907 file, but ptr will never get there, because as soon as it gets over 2/3 of the
908 way, the buffer is shifted left and re-filled. */
909
910 while (ptr < endptr)
911 {
912 int i, endlinelength;
913 int mrc = 0;
914 BOOL match = FALSE;
915 char *matchptr = ptr;
916 char *t = ptr;
917 size_t length, linelength;
918
919 /* At this point, ptr is at the start of a line. We need to find the length
920 of the subject string to pass to pcre_exec(). In multiline mode, it is the
921 length remainder of the data in the buffer. Otherwise, it is the length of
922 the next line. After matching, we always advance by the length of the next
923 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
924 that any match is constrained to be in the first line. */
925
926 t = end_of_line(t, endptr, &endlinelength);
927 linelength = t - ptr - endlinelength;
928 length = multiline? (size_t)(endptr - ptr) : linelength;
929
930 /* Extra processing for Jeffrey Friedl's debugging. */
931
932 #ifdef JFRIEDL_DEBUG
933 if (jfriedl_XT || jfriedl_XR)
934 {
935 #include <sys/time.h>
936 #include <time.h>
937 struct timeval start_time, end_time;
938 struct timezone dummy;
939
940 if (jfriedl_XT)
941 {
942 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
943 const char *orig = ptr;
944 ptr = malloc(newlen + 1);
945 if (!ptr) {
946 printf("out of memory");
947 exit(2);
948 }
949 endptr = ptr;
950 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
951 for (i = 0; i < jfriedl_XT; i++) {
952 strncpy(endptr, orig, length);
953 endptr += length;
954 }
955 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
956 length = newlen;
957 }
958
959 if (gettimeofday(&start_time, &dummy) != 0)
960 perror("bad gettimeofday");
961
962
963 for (i = 0; i < jfriedl_XR; i++)
964 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
965
966 if (gettimeofday(&end_time, &dummy) != 0)
967 perror("bad gettimeofday");
968
969 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
970 -
971 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
972
973 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
974 return 0;
975 }
976 #endif
977
978 /* We come back here after a match when the -o option (only_matching) is set,
979 in order to find any further matches in the same line. */
980
981 ONLY_MATCHING_RESTART:
982
983 /* Run through all the patterns until one matches. Note that we don't include
984 the final newline in the subject string. */
985
986 for (i = 0; i < pattern_count; i++)
987 {
988 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
989 offsets, 99);
990 if (mrc >= 0) { match = TRUE; break; }
991 if (mrc != PCRE_ERROR_NOMATCH)
992 {
993 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
994 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
995 fprintf(stderr, "this line:\n");
996 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
997 fprintf(stderr, "\n");
998 if (error_count == 0 &&
999 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
1000 {
1001 fprintf(stderr, "pcregrep: error %d means that a resource limit "
1002 "was exceeded\n", mrc);
1003 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
1004 }
1005 if (error_count++ > 20)
1006 {
1007 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
1008 exit(2);
1009 }
1010 match = invert; /* No more matching; don't show the line again */
1011 break;
1012 }
1013 }
1014
1015 /* If it's a match or a not-match (as required), do what's wanted. */
1016
1017 if (match != invert)
1018 {
1019 BOOL hyphenprinted = FALSE;
1020
1021 /* We've failed if we want a file that doesn't have any matches. */
1022
1023 if (filenames == FN_NOMATCH_ONLY) return 1;
1024
1025 /* Just count if just counting is wanted. */
1026
1027 if (count_only) count++;
1028
1029 /* If all we want is a file name, there is no need to scan any more lines
1030 in the file. */
1031
1032 else if (filenames == FN_ONLY)
1033 {
1034 fprintf(stdout, "%s\n", printname);
1035 return 0;
1036 }
1037
1038 /* Likewise, if all we want is a yes/no answer. */
1039
1040 else if (quiet) return 0;
1041
1042 /* The --only-matching option prints just the substring that matched, and
1043 the --file-offsets and --line-offsets options output offsets for the
1044 matching substring (they both force --only-matching). None of these options
1045 prints any context. Afterwards, adjust the start and length, and then jump
1046 back to look for further matches in the same line. If we are in invert
1047 mode, however, nothing is printed - this could be still useful because the
1048 return code is set. */
1049
1050 else if (only_matching)
1051 {
1052 if (!invert)
1053 {
1054 if (printname != NULL) fprintf(stdout, "%s:", printname);
1055 if (number) fprintf(stdout, "%d:", linenumber);
1056 if (line_offsets)
1057 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1058 offsets[1] - offsets[0]);
1059 else if (file_offsets)
1060 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1061 offsets[1] - offsets[0]);
1062 else
1063 {
1064 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1065 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1066 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1067 }
1068 fprintf(stdout, "\n");
1069 matchptr += offsets[1];
1070 length -= offsets[1];
1071 match = FALSE;
1072 goto ONLY_MATCHING_RESTART;
1073 }
1074 }
1075
1076 /* This is the default case when none of the above options is set. We print
1077 the matching lines(s), possibly preceded and/or followed by other lines of
1078 context. */
1079
1080 else
1081 {
1082 /* See if there is a requirement to print some "after" lines from a
1083 previous match. We never print any overlaps. */
1084
1085 if (after_context > 0 && lastmatchnumber > 0)
1086 {
1087 int ellength;
1088 int linecount = 0;
1089 char *p = lastmatchrestart;
1090
1091 while (p < ptr && linecount < after_context)
1092 {
1093 p = end_of_line(p, ptr, &ellength);
1094 linecount++;
1095 }
1096
1097 /* It is important to advance lastmatchrestart during this printing so
1098 that it interacts correctly with any "before" printing below. Print
1099 each line's data using fwrite() in case there are binary zeroes. */
1100
1101 while (lastmatchrestart < p)
1102 {
1103 char *pp = lastmatchrestart;
1104 if (printname != NULL) fprintf(stdout, "%s-", printname);
1105 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1106 pp = end_of_line(pp, endptr, &ellength);
1107 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1108 lastmatchrestart = pp;
1109 }
1110 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1111 }
1112
1113 /* If there were non-contiguous lines printed above, insert hyphens. */
1114
1115 if (hyphenpending)
1116 {
1117 fprintf(stdout, "--\n");
1118 hyphenpending = FALSE;
1119 hyphenprinted = TRUE;
1120 }
1121
1122 /* See if there is a requirement to print some "before" lines for this
1123 match. Again, don't print overlaps. */
1124
1125 if (before_context > 0)
1126 {
1127 int linecount = 0;
1128 char *p = ptr;
1129
1130 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1131 linecount < before_context)
1132 {
1133 linecount++;
1134 p = previous_line(p, buffer);
1135 }
1136
1137 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1138 fprintf(stdout, "--\n");
1139
1140 while (p < ptr)
1141 {
1142 int ellength;
1143 char *pp = p;
1144 if (printname != NULL) fprintf(stdout, "%s-", printname);
1145 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1146 pp = end_of_line(pp, endptr, &ellength);
1147 fwrite(p, 1, pp - p, stdout);
1148 p = pp;
1149 }
1150 }
1151
1152 /* Now print the matching line(s); ensure we set hyphenpending at the end
1153 of the file if any context lines are being output. */
1154
1155 if (after_context > 0 || before_context > 0)
1156 endhyphenpending = TRUE;
1157
1158 if (printname != NULL) fprintf(stdout, "%s:", printname);
1159 if (number) fprintf(stdout, "%d:", linenumber);
1160
1161 /* In multiline mode, we want to print to the end of the line in which
1162 the end of the matched string is found, so we adjust linelength and the
1163 line number appropriately, but only when there actually was a match
1164 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1165 the match will always be before the first newline sequence. */
1166
1167 if (multiline)
1168 {
1169 int ellength;
1170 char *endmatch = ptr;
1171 if (!invert)
1172 {
1173 endmatch += offsets[1];
1174 t = ptr;
1175 while (t < endmatch)
1176 {
1177 t = end_of_line(t, endptr, &ellength);
1178 if (t <= endmatch) linenumber++; else break;
1179 }
1180 }
1181 endmatch = end_of_line(endmatch, endptr, &ellength);
1182 linelength = endmatch - ptr - ellength;
1183 }
1184
1185 /*** NOTE: Use only fwrite() to output the data line, so that binary
1186 zeroes are treated as just another data character. */
1187
1188 /* This extra option, for Jeffrey Friedl's debugging requirements,
1189 replaces the matched string, or a specific captured string if it exists,
1190 with X. When this happens, colouring is ignored. */
1191
1192 #ifdef JFRIEDL_DEBUG
1193 if (S_arg >= 0 && S_arg < mrc)
1194 {
1195 int first = S_arg * 2;
1196 int last = first + 1;
1197 fwrite(ptr, 1, offsets[first], stdout);
1198 fprintf(stdout, "X");
1199 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1200 }
1201 else
1202 #endif
1203
1204 /* We have to split the line(s) up if colouring. */
1205
1206 if (do_colour)
1207 {
1208 fwrite(ptr, 1, offsets[0], stdout);
1209 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1210 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1211 fprintf(stdout, "%c[00m", 0x1b);
1212 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1213 stdout);
1214 }
1215 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1216 }
1217
1218 /* End of doing what has to be done for a match */
1219
1220 rc = 0; /* Had some success */
1221
1222 /* Remember where the last match happened for after_context. We remember
1223 where we are about to restart, and that line's number. */
1224
1225 lastmatchrestart = ptr + linelength + endlinelength;
1226 lastmatchnumber = linenumber + 1;
1227 }
1228
1229 /* For a match in multiline inverted mode (which of course did not cause
1230 anything to be printed), we have to move on to the end of the match before
1231 proceeding. */
1232
1233 if (multiline && invert && match)
1234 {
1235 int ellength;
1236 char *endmatch = ptr + offsets[1];
1237 t = ptr;
1238 while (t < endmatch)
1239 {
1240 t = end_of_line(t, endptr, &ellength);
1241 if (t <= endmatch) linenumber++; else break;
1242 }
1243 endmatch = end_of_line(endmatch, endptr, &ellength);
1244 linelength = endmatch - ptr - ellength;
1245 }
1246
1247 /* Advance to after the newline and increment the line number. The file
1248 offset to the current line is maintained in filepos. */
1249
1250 ptr += linelength + endlinelength;
1251 filepos += linelength + endlinelength;
1252 linenumber++;
1253
1254 /* If we haven't yet reached the end of the file (the buffer is full), and
1255 the current point is in the top 1/3 of the buffer, slide the buffer down by
1256 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1257 about to be lost, print them. */
1258
1259 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1260 {
1261 if (after_context > 0 &&
1262 lastmatchnumber > 0 &&
1263 lastmatchrestart < buffer + MBUFTHIRD)
1264 {
1265 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1266 lastmatchnumber = 0;
1267 }
1268
1269 /* Now do the shuffle */
1270
1271 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1272 ptr -= MBUFTHIRD;
1273
1274 #ifdef SUPPORT_LIBZ
1275 if (frtype == FR_LIBZ)
1276 bufflength = 2*MBUFTHIRD +
1277 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1278 else
1279 #endif
1280
1281 #ifdef SUPPORT_LIBBZ2
1282 if (frtype == FR_LIBBZ2)
1283 bufflength = 2*MBUFTHIRD +
1284 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1285 else
1286 #endif
1287
1288 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1289
1290 endptr = buffer + bufflength;
1291
1292 /* Adjust any last match point */
1293
1294 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1295 }
1296 } /* Loop through the whole file */
1297
1298 /* End of file; print final "after" lines if wanted; do_after_lines sets
1299 hyphenpending if it prints something. */
1300
1301 if (!only_matching && !count_only)
1302 {
1303 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1304 hyphenpending |= endhyphenpending;
1305 }
1306
1307 /* Print the file name if we are looking for those without matches and there
1308 were none. If we found a match, we won't have got this far. */
1309
1310 if (filenames == FN_NOMATCH_ONLY)
1311 {
1312 fprintf(stdout, "%s\n", printname);
1313 return 0;
1314 }
1315
1316 /* Print the match count if wanted */
1317
1318 if (count_only)
1319 {
1320 if (printname != NULL) fprintf(stdout, "%s:", printname);
1321 fprintf(stdout, "%d\n", count);
1322 }
1323
1324 return rc;
1325 }
1326
1327
1328
1329 /*************************************************
1330 * Grep a file or recurse into a directory *
1331 *************************************************/
1332
1333 /* Given a path name, if it's a directory, scan all the files if we are
1334 recursing; if it's a file, grep it.
1335
1336 Arguments:
1337 pathname the path to investigate
1338 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1339 only_one_at_top TRUE if the path is the only one at toplevel
1340
1341 Returns: 0 if there was at least one match
1342 1 if there were no matches
1343 2 there was some kind of error
1344
1345 However, file opening failures are suppressed if "silent" is set.
1346 */
1347
1348 static int
1349 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1350 {
1351 int rc = 1;
1352 int sep;
1353 int frtype;
1354 int pathlen;
1355 void *handle;
1356 FILE *in = NULL; /* Ensure initialized */
1357
1358 #ifdef SUPPORT_LIBZ
1359 gzFile ingz = NULL;
1360 #endif
1361
1362 #ifdef SUPPORT_LIBBZ2
1363 BZFILE *inbz2 = NULL;
1364 #endif
1365
1366 /* If the file name is "-" we scan stdin */
1367
1368 if (strcmp(pathname, "-") == 0)
1369 {
1370 return pcregrep(stdin, FR_PLAIN,
1371 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1372 stdin_name : NULL);
1373 }
1374
1375 /* If the file is a directory, skip if skipping or if we are recursing, scan
1376 each file and directory within it, subject to any include or exclude patterns
1377 that were set. The scanning code is localized so it can be made
1378 system-specific. */
1379
1380 if ((sep = isdirectory(pathname)) != 0)
1381 {
1382 if (dee_action == dee_SKIP) return 1;
1383 if (dee_action == dee_RECURSE)
1384 {
1385 char buffer[1024];
1386 char *nextfile;
1387 directory_type *dir = opendirectory(pathname);
1388
1389 if (dir == NULL)
1390 {
1391 if (!silent)
1392 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1393 strerror(errno));
1394 return 2;
1395 }
1396
1397 while ((nextfile = readdirectory(dir)) != NULL)
1398 {
1399 int frc, nflen;
1400 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1401 nflen = strlen(nextfile);
1402
1403 if (isdirectory(buffer))
1404 {
1405 if (exclude_dir_compiled != NULL &&
1406 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1407 continue;
1408
1409 if (include_dir_compiled != NULL &&
1410 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1411 continue;
1412 }
1413 else
1414 {
1415 if (exclude_compiled != NULL &&
1416 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1417 continue;
1418
1419 if (include_compiled != NULL &&
1420 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1421 continue;
1422 }
1423
1424 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1425 if (frc > 1) rc = frc;
1426 else if (frc == 0 && rc == 1) rc = 0;
1427 }
1428
1429 closedirectory(dir);
1430 return rc;
1431 }
1432 }
1433
1434 /* If the file is not a directory and not a regular file, skip it if that's
1435 been requested. */
1436
1437 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1438
1439 /* Control reaches here if we have a regular file, or if we have a directory
1440 and recursion or skipping was not requested, or if we have anything else and
1441 skipping was not requested. The scan proceeds. If this is the first and only
1442 argument at top level, we don't show the file name, unless we are only showing
1443 the file name, or the filename was forced (-H). */
1444
1445 pathlen = strlen(pathname);
1446
1447 /* Open using zlib if it is supported and the file name ends with .gz. */
1448
1449 #ifdef SUPPORT_LIBZ
1450 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1451 {
1452 ingz = gzopen(pathname, "rb");
1453 if (ingz == NULL)
1454 {
1455 if (!silent)
1456 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1457 strerror(errno));
1458 return 2;
1459 }
1460 handle = (void *)ingz;
1461 frtype = FR_LIBZ;
1462 }
1463 else
1464 #endif
1465
1466 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1467
1468 #ifdef SUPPORT_LIBBZ2
1469 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1470 {
1471 inbz2 = BZ2_bzopen(pathname, "rb");
1472 handle = (void *)inbz2;
1473 frtype = FR_LIBBZ2;
1474 }
1475 else
1476 #endif
1477
1478 /* Otherwise use plain fopen(). The label is so that we can come back here if
1479 an attempt to read a .bz2 file indicates that it really is a plain file. */
1480
1481 #ifdef SUPPORT_LIBBZ2
1482 PLAIN_FILE:
1483 #endif
1484 {
1485 in = fopen(pathname, "r");
1486 handle = (void *)in;
1487 frtype = FR_PLAIN;
1488 }
1489
1490 /* All the opening methods return errno when they fail. */
1491
1492 if (handle == NULL)
1493 {
1494 if (!silent)
1495 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1496 strerror(errno));
1497 return 2;
1498 }
1499
1500 /* Now grep the file */
1501
1502 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1503 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1504
1505 /* Close in an appropriate manner. */
1506
1507 #ifdef SUPPORT_LIBZ
1508 if (frtype == FR_LIBZ)
1509 gzclose(ingz);
1510 else
1511 #endif
1512
1513 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1514 read failed. If the error indicates that the file isn't in fact bzipped, try
1515 again as a normal file. */
1516
1517 #ifdef SUPPORT_LIBBZ2
1518 if (frtype == FR_LIBBZ2)
1519 {
1520 if (rc == 2)
1521 {
1522 int errnum;
1523 const char *err = BZ2_bzerror(inbz2, &errnum);
1524 if (errnum == BZ_DATA_ERROR_MAGIC)
1525 {
1526 BZ2_bzclose(inbz2);
1527 goto PLAIN_FILE;
1528 }
1529 else if (!silent)
1530 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1531 pathname, err);
1532 }
1533 BZ2_bzclose(inbz2);
1534 }
1535 else
1536 #endif
1537
1538 /* Normal file close */
1539
1540 fclose(in);
1541
1542 /* Pass back the yield from pcregrep(). */
1543
1544 return rc;
1545 }
1546
1547
1548
1549
1550 /*************************************************
1551 * Usage function *
1552 *************************************************/
1553
1554 static int
1555 usage(int rc)
1556 {
1557 option_item *op;
1558 fprintf(stderr, "Usage: pcregrep [-");
1559 for (op = optionlist; op->one_char != 0; op++)
1560 {
1561 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1562 }
1563 fprintf(stderr, "] [long options] [pattern] [files]\n");
1564 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1565 "options.\n");
1566 return rc;
1567 }
1568
1569
1570
1571
1572 /*************************************************
1573 * Help function *
1574 *************************************************/
1575
1576 static void
1577 help(void)
1578 {
1579 option_item *op;
1580
1581 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1582 printf("Search for PATTERN in each FILE or standard input.\n");
1583 printf("PATTERN must be present if neither -e nor -f is used.\n");
1584 printf("\"-\" can be used as a file name to mean STDIN.\n");
1585
1586 #ifdef SUPPORT_LIBZ
1587 printf("Files whose names end in .gz are read using zlib.\n");
1588 #endif
1589
1590 #ifdef SUPPORT_LIBBZ2
1591 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1592 #endif
1593
1594 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1595 printf("Other files and the standard input are read as plain files.\n\n");
1596 #else
1597 printf("All files are read as plain files, without any interpretation.\n\n");
1598 #endif
1599
1600 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1601 printf("Options:\n");
1602
1603 for (op = optionlist; op->one_char != 0; op++)
1604 {
1605 int n;
1606 char s[4];
1607 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1608 n = 30 - printf(" %s --%s", s, op->long_name);
1609 if (n < 1) n = 1;
1610 printf("%.*s%s\n", n, " ", op->help_text);
1611 }
1612
1613 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1614 printf("trailing white space is removed and blank lines are ignored.\n");
1615 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1616
1617 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1618 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1619 }
1620
1621
1622
1623
1624 /*************************************************
1625 * Handle a single-letter, no data option *
1626 *************************************************/
1627
1628 static int
1629 handle_option(int letter, int options)
1630 {
1631 switch(letter)
1632 {
1633 case N_FOFFSETS: file_offsets = TRUE; break;
1634 case N_HELP: help(); exit(0);
1635 case N_LOFFSETS: line_offsets = number = TRUE; break;
1636 case 'c': count_only = TRUE; break;
1637 case 'F': process_options |= PO_FIXED_STRINGS; break;
1638 case 'H': filenames = FN_FORCE; break;
1639 case 'h': filenames = FN_NONE; break;
1640 case 'i': options |= PCRE_CASELESS; break;
1641 case 'l': filenames = FN_ONLY; break;
1642 case 'L': filenames = FN_NOMATCH_ONLY; break;
1643 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1644 case 'n': number = TRUE; break;
1645 case 'o': only_matching = TRUE; break;
1646 case 'q': quiet = TRUE; break;
1647 case 'r': dee_action = dee_RECURSE; break;
1648 case 's': silent = TRUE; break;
1649 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1650 case 'v': invert = TRUE; break;
1651 case 'w': process_options |= PO_WORD_MATCH; break;
1652 case 'x': process_options |= PO_LINE_MATCH; break;
1653
1654 case 'V':
1655 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1656 exit(0);
1657 break;
1658
1659 default:
1660 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1661 exit(usage(2));
1662 }
1663
1664 return options;
1665 }
1666
1667
1668
1669
1670 /*************************************************
1671 * Construct printed ordinal *
1672 *************************************************/
1673
1674 /* This turns a number into "1st", "3rd", etc. */
1675
1676 static char *
1677 ordin(int n)
1678 {
1679 static char buffer[8];
1680 char *p = buffer;
1681 sprintf(p, "%d", n);
1682 while (*p != 0) p++;
1683 switch (n%10)
1684 {
1685 case 1: strcpy(p, "st"); break;
1686 case 2: strcpy(p, "nd"); break;
1687 case 3: strcpy(p, "rd"); break;
1688 default: strcpy(p, "th"); break;
1689 }
1690 return buffer;
1691 }
1692
1693
1694
1695 /*************************************************
1696 * Compile a single pattern *
1697 *************************************************/
1698
1699 /* When the -F option has been used, this is called for each substring.
1700 Otherwise it's called for each supplied pattern.
1701
1702 Arguments:
1703 pattern the pattern string
1704 options the PCRE options
1705 filename the file name, or NULL for a command-line pattern
1706 count 0 if this is the only command line pattern, or
1707 number of the command line pattern, or
1708 linenumber for a pattern from a file
1709
1710 Returns: TRUE on success, FALSE after an error
1711 */
1712
1713 static BOOL
1714 compile_single_pattern(char *pattern, int options, char *filename, int count)
1715 {
1716 char buffer[MBUFTHIRD + 16];
1717 const char *error;
1718 int errptr;
1719
1720 if (pattern_count >= MAX_PATTERN_COUNT)
1721 {
1722 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1723 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1724 return FALSE;
1725 }
1726
1727 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1728 suffix[process_options]);
1729 pattern_list[pattern_count] =
1730 pcre_compile(buffer, options, &error, &errptr, pcretables);
1731 if (pattern_list[pattern_count] != NULL)
1732 {
1733 pattern_count++;
1734 return TRUE;
1735 }
1736
1737 /* Handle compile errors */
1738
1739 errptr -= (int)strlen(prefix[process_options]);
1740 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1741
1742 if (filename == NULL)
1743 {
1744 if (count == 0)
1745 fprintf(stderr, "pcregrep: Error in command-line regex "
1746 "at offset %d: %s\n", errptr, error);
1747 else
1748 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1749 "at offset %d: %s\n", ordin(count), errptr, error);
1750 }
1751 else
1752 {
1753 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1754 "at offset %d: %s\n", count, filename, errptr, error);
1755 }
1756
1757 return FALSE;
1758 }
1759
1760
1761
1762 /*************************************************
1763 * Compile one supplied pattern *
1764 *************************************************/
1765
1766 /* When the -F option has been used, each string may be a list of strings,
1767 separated by line breaks. They will be matched literally.
1768
1769 Arguments:
1770 pattern the pattern string
1771 options the PCRE options
1772 filename the file name, or NULL for a command-line pattern
1773 count 0 if this is the only command line pattern, or
1774 number of the command line pattern, or
1775 linenumber for a pattern from a file
1776
1777 Returns: TRUE on success, FALSE after an error
1778 */
1779
1780 static BOOL
1781 compile_pattern(char *pattern, int options, char *filename, int count)
1782 {
1783 if ((process_options & PO_FIXED_STRINGS) != 0)
1784 {
1785 char *eop = pattern + strlen(pattern);
1786 char buffer[MBUFTHIRD];
1787 for(;;)
1788 {
1789 int ellength;
1790 char *p = end_of_line(pattern, eop, &ellength);
1791 if (ellength == 0)
1792 return compile_single_pattern(pattern, options, filename, count);
1793 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1794 pattern = p;
1795 if (!compile_single_pattern(buffer, options, filename, count))
1796 return FALSE;
1797 }
1798 }
1799 else return compile_single_pattern(pattern, options, filename, count);
1800 }
1801
1802
1803
1804 /*************************************************
1805 * Main program *
1806 *************************************************/
1807
1808 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1809
1810 int
1811 main(int argc, char **argv)
1812 {
1813 int i, j;
1814 int rc = 1;
1815 int pcre_options = 0;
1816 int cmd_pattern_count = 0;
1817 int hint_count = 0;
1818 int errptr;
1819 BOOL only_one_at_top;
1820 char *patterns[MAX_PATTERN_COUNT];
1821 const char *locale_from = "--locale";
1822 const char *error;
1823
1824 /* Set the default line ending value from the default in the PCRE library;
1825 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1826 */
1827
1828 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1829 switch(i)
1830 {
1831 default: newline = (char *)"lf"; break;
1832 case '\r': newline = (char *)"cr"; break;
1833 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1834 case -1: newline = (char *)"any"; break;
1835 case -2: newline = (char *)"anycrlf"; break;
1836 }
1837
1838 /* Process the options */
1839
1840 for (i = 1; i < argc; i++)
1841 {
1842 option_item *op = NULL;
1843 char *option_data = (char *)""; /* default to keep compiler happy */
1844 BOOL longop;
1845 BOOL longopwasequals = FALSE;
1846
1847 if (argv[i][0] != '-') break;
1848
1849 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1850 but only if we have previously had -e or -f to define the patterns. */
1851
1852 if (argv[i][1] == 0)
1853 {
1854 if (pattern_filename != NULL || pattern_count > 0) break;
1855 else exit(usage(2));
1856 }
1857
1858 /* Handle a long name option, or -- to terminate the options */
1859
1860 if (argv[i][1] == '-')
1861 {
1862 char *arg = argv[i] + 2;
1863 char *argequals = strchr(arg, '=');
1864
1865 if (*arg == 0) /* -- terminates options */
1866 {
1867 i++;
1868 break; /* out of the options-handling loop */
1869 }
1870
1871 longop = TRUE;
1872
1873 /* Some long options have data that follows after =, for example file=name.
1874 Some options have variations in the long name spelling: specifically, we
1875 allow "regexp" because GNU grep allows it, though I personally go along
1876 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1877 These options are entered in the table as "regex(p)". No option is in both
1878 these categories, fortunately. */
1879
1880 for (op = optionlist; op->one_char != 0; op++)
1881 {
1882 char *opbra = strchr(op->long_name, '(');
1883 char *equals = strchr(op->long_name, '=');
1884 if (opbra == NULL) /* Not a (p) case */
1885 {
1886 if (equals == NULL) /* Not thing=data case */
1887 {
1888 if (strcmp(arg, op->long_name) == 0) break;
1889 }
1890 else /* Special case xxx=data */
1891 {
1892 int oplen = equals - op->long_name;
1893 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1894 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1895 {
1896 option_data = arg + arglen;
1897 if (*option_data == '=')
1898 {
1899 option_data++;
1900 longopwasequals = TRUE;
1901 }
1902 break;
1903 }
1904 }
1905 }
1906 else /* Special case xxxx(p) */
1907 {
1908 char buff1[24];
1909 char buff2[24];
1910 int baselen = opbra - op->long_name;
1911 sprintf(buff1, "%.*s", baselen, op->long_name);
1912 sprintf(buff2, "%s%.*s", buff1,
1913 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1914 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1915 break;
1916 }
1917 }
1918
1919 if (op->one_char == 0)
1920 {
1921 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1922 exit(usage(2));
1923 }
1924 }
1925
1926
1927 /* Jeffrey Friedl's debugging harness uses these additional options which
1928 are not in the right form for putting in the option table because they use
1929 only one hyphen, yet are more than one character long. By putting them
1930 separately here, they will not get displayed as part of the help() output,
1931 but I don't think Jeffrey will care about that. */
1932
1933 #ifdef JFRIEDL_DEBUG
1934 else if (strcmp(argv[i], "-pre") == 0) {
1935 jfriedl_prefix = argv[++i];
1936 continue;
1937 } else if (strcmp(argv[i], "-post") == 0) {
1938 jfriedl_postfix = argv[++i];
1939 continue;
1940 } else if (strcmp(argv[i], "-XT") == 0) {
1941 sscanf(argv[++i], "%d", &jfriedl_XT);
1942 continue;
1943 } else if (strcmp(argv[i], "-XR") == 0) {
1944 sscanf(argv[++i], "%d", &jfriedl_XR);
1945 continue;
1946 }
1947 #endif
1948
1949
1950 /* One-char options; many that have no data may be in a single argument; we
1951 continue till we hit the last one or one that needs data. */
1952
1953 else
1954 {
1955 char *s = argv[i] + 1;
1956 longop = FALSE;
1957 while (*s != 0)
1958 {
1959 for (op = optionlist; op->one_char != 0; op++)
1960 { if (*s == op->one_char) break; }
1961 if (op->one_char == 0)
1962 {
1963 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1964 *s, argv[i]);
1965 exit(usage(2));
1966 }
1967 if (op->type != OP_NODATA || s[1] == 0)
1968 {
1969 option_data = s+1;
1970 break;
1971 }
1972 pcre_options = handle_option(*s++, pcre_options);
1973 }
1974 }
1975
1976 /* At this point we should have op pointing to a matched option. If the type
1977 is NO_DATA, it means that there is no data, and the option might set
1978 something in the PCRE options. */
1979
1980 if (op->type == OP_NODATA)
1981 {
1982 pcre_options = handle_option(op->one_char, pcre_options);
1983 continue;
1984 }
1985
1986 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1987 either has a value or defaults to something. It cannot have data in a
1988 separate item. At the moment, the only such options are "colo(u)r" and
1989 Jeffrey Friedl's special -S debugging option. */
1990
1991 if (*option_data == 0 &&
1992 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1993 {
1994 switch (op->one_char)
1995 {
1996 case N_COLOUR:
1997 colour_option = (char *)"auto";
1998 break;
1999 #ifdef JFRIEDL_DEBUG
2000 case 'S':
2001 S_arg = 0;
2002 break;
2003 #endif
2004 }
2005 continue;
2006 }
2007
2008 /* Otherwise, find the data string for the option. */
2009
2010 if (*option_data == 0)
2011 {
2012 if (i >= argc - 1 || longopwasequals)
2013 {
2014 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2015 exit(usage(2));
2016 }
2017 option_data = argv[++i];
2018 }
2019
2020 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2021 multiple times to create a list of patterns. */
2022
2023 if (op->type == OP_PATLIST)
2024 {
2025 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2026 {
2027 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2028 MAX_PATTERN_COUNT);
2029 return 2;
2030 }
2031 patterns[cmd_pattern_count++] = option_data;
2032 }
2033
2034 /* Otherwise, deal with single string or numeric data values. */
2035
2036 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2037 {
2038 *((char **)op->dataptr) = option_data;
2039 }
2040 else
2041 {
2042 char *endptr;
2043 int n = strtoul(option_data, &endptr, 10);
2044 if (*endptr != 0)
2045 {
2046 if (longop)
2047 {
2048 char *equals = strchr(op->long_name, '=');
2049 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2050 equals - op->long_name;
2051 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2052 option_data, nlen, op->long_name);
2053 }
2054 else
2055 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2056 option_data, op->one_char);
2057 exit(usage(2));
2058 }
2059 *((int *)op->dataptr) = n;
2060 }
2061 }
2062
2063 /* Options have been decoded. If -C was used, its value is used as a default
2064 for -A and -B. */
2065
2066 if (both_context > 0)
2067 {
2068 if (after_context == 0) after_context = both_context;
2069 if (before_context == 0) before_context = both_context;
2070 }
2071
2072 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2073 However, the latter two set the only_matching flag. */
2074
2075 if ((only_matching && (file_offsets || line_offsets)) ||
2076 (file_offsets && line_offsets))
2077 {
2078 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2079 "and/or --line-offsets\n");
2080 exit(usage(2));
2081 }
2082
2083 if (file_offsets || line_offsets) only_matching = TRUE;
2084
2085 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2086 LC_ALL environment variable is set, and if so, use it. */
2087
2088 if (locale == NULL)
2089 {
2090 locale = getenv("LC_ALL");
2091 locale_from = "LCC_ALL";
2092 }
2093
2094 if (locale == NULL)
2095 {
2096 locale = getenv("LC_CTYPE");
2097 locale_from = "LC_CTYPE";
2098 }
2099
2100 /* If a locale has been provided, set it, and generate the tables the PCRE
2101 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2102
2103 if (locale != NULL)
2104 {
2105 if (setlocale(LC_CTYPE, locale) == NULL)
2106 {
2107 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2108 locale, locale_from);
2109 return 2;
2110 }
2111 pcretables = pcre_maketables();
2112 }
2113
2114 /* Sort out colouring */
2115
2116 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2117 {
2118 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2119 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2120 else
2121 {
2122 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2123 colour_option);
2124 return 2;
2125 }
2126 if (do_colour)
2127 {
2128 char *cs = getenv("PCREGREP_COLOUR");
2129 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2130 if (cs != NULL) colour_string = cs;
2131 }
2132 }
2133
2134 /* Interpret the newline type; the default settings are Unix-like. */
2135
2136 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2137 {
2138 pcre_options |= PCRE_NEWLINE_CR;
2139 endlinetype = EL_CR;
2140 }
2141 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2142 {
2143 pcre_options |= PCRE_NEWLINE_LF;
2144 endlinetype = EL_LF;
2145 }
2146 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2147 {
2148 pcre_options |= PCRE_NEWLINE_CRLF;
2149 endlinetype = EL_CRLF;
2150 }
2151 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2152 {
2153 pcre_options |= PCRE_NEWLINE_ANY;
2154 endlinetype = EL_ANY;
2155 }
2156 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2157 {
2158 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2159 endlinetype = EL_ANYCRLF;
2160 }
2161 else
2162 {
2163 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2164 return 2;
2165 }
2166
2167 /* Interpret the text values for -d and -D */
2168
2169 if (dee_option != NULL)
2170 {
2171 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2172 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2173 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2174 else
2175 {
2176 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2177 return 2;
2178 }
2179 }
2180
2181 if (DEE_option != NULL)
2182 {
2183 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2184 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2185 else
2186 {
2187 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2188 return 2;
2189 }
2190 }
2191
2192 /* Check the values for Jeffrey Friedl's debugging options. */
2193
2194 #ifdef JFRIEDL_DEBUG
2195 if (S_arg > 9)
2196 {
2197 fprintf(stderr, "pcregrep: bad value for -S option\n");
2198 return 2;
2199 }
2200 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2201 {
2202 if (jfriedl_XT == 0) jfriedl_XT = 1;
2203 if (jfriedl_XR == 0) jfriedl_XR = 1;
2204 }
2205 #endif
2206
2207 /* Get memory to store the pattern and hints lists. */
2208
2209 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2210 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2211
2212 if (pattern_list == NULL || hints_list == NULL)
2213 {
2214 fprintf(stderr, "pcregrep: malloc failed\n");
2215 goto EXIT2;
2216 }
2217
2218 /* If no patterns were provided by -e, and there is no file provided by -f,
2219 the first argument is the one and only pattern, and it must exist. */
2220
2221 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2222 {
2223 if (i >= argc) return usage(2);
2224 patterns[cmd_pattern_count++] = argv[i++];
2225 }
2226
2227 /* Compile the patterns that were provided on the command line, either by
2228 multiple uses of -e or as a single unkeyed pattern. */
2229
2230 for (j = 0; j < cmd_pattern_count; j++)
2231 {
2232 if (!compile_pattern(patterns[j], pcre_options, NULL,
2233 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2234 goto EXIT2;
2235 }
2236
2237 /* Compile the regular expressions that are provided in a file. */
2238
2239 if (pattern_filename != NULL)
2240 {
2241 int linenumber = 0;
2242 FILE *f;
2243 char *filename;
2244 char buffer[MBUFTHIRD];
2245
2246 if (strcmp(pattern_filename, "-") == 0)
2247 {
2248 f = stdin;
2249 filename = stdin_name;
2250 }
2251 else
2252 {
2253 f = fopen(pattern_filename, "r");
2254 if (f == NULL)
2255 {
2256 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2257 strerror(errno));
2258 goto EXIT2;
2259 }
2260 filename = pattern_filename;
2261 }
2262
2263 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2264 {
2265 char *s = buffer + (int)strlen(buffer);
2266 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2267 *s = 0;
2268 linenumber++;
2269 if (buffer[0] == 0) continue; /* Skip blank lines */
2270 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2271 goto EXIT2;
2272 }
2273
2274 if (f != stdin) fclose(f);
2275 }
2276
2277 /* Study the regular expressions, as we will be running them many times */
2278
2279 for (j = 0; j < pattern_count; j++)
2280 {
2281 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2282 if (error != NULL)
2283 {
2284 char s[16];
2285 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2286 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2287 goto EXIT2;
2288 }
2289 hint_count++;
2290 }
2291
2292 /* If there are include or exclude patterns, compile them. */
2293
2294 if (exclude_pattern != NULL)
2295 {
2296 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2297 pcretables);
2298 if (exclude_compiled == NULL)
2299 {
2300 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2301 errptr, error);
2302 goto EXIT2;
2303 }
2304 }
2305
2306 if (include_pattern != NULL)
2307 {
2308 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2309 pcretables);
2310 if (include_compiled == NULL)
2311 {
2312 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2313 errptr, error);
2314 goto EXIT2;
2315 }
2316 }
2317
2318 if (exclude_dir_pattern != NULL)
2319 {
2320 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2321 pcretables);
2322 if (exclude_dir_compiled == NULL)
2323 {
2324 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2325 errptr, error);
2326 goto EXIT2;
2327 }
2328 }
2329
2330 if (include_dir_pattern != NULL)
2331 {
2332 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2333 pcretables);
2334 if (include_dir_compiled == NULL)
2335 {
2336 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2337 errptr, error);
2338 goto EXIT2;
2339 }
2340 }
2341
2342 /* If there are no further arguments, do the business on stdin and exit. */
2343
2344 if (i >= argc)
2345 {
2346 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2347 goto EXIT;
2348 }
2349
2350 /* Otherwise, work through the remaining arguments as files or directories.
2351 Pass in the fact that there is only one argument at top level - this suppresses
2352 the file name if the argument is not a directory and filenames are not
2353 otherwise forced. */
2354
2355 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2356
2357 for (; i < argc; i++)
2358 {
2359 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2360 only_one_at_top);
2361 if (frc > 1) rc = frc;
2362 else if (frc == 0 && rc == 1) rc = 0;
2363 }
2364
2365 EXIT:
2366 if (pattern_list != NULL)
2367 {
2368 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2369 free(pattern_list);
2370 }
2371 if (hints_list != NULL)
2372 {
2373 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2374 free(hints_list);
2375 }
2376 return rc;
2377
2378 EXIT2:
2379 rc = 2;
2380 goto EXIT;
2381 }
2382
2383 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5